DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
@ 2019-07-10  9:29 Olivier Matz
  2019-07-10 17:14 ` Wang, Haiyue
                   ` (7 more replies)
  0 siblings, 8 replies; 64+ messages in thread
From: Olivier Matz @ 2019-07-10  9:29 UTC (permalink / raw)
  To: dev

Many features require to store data inside the mbuf. As the room in mbuf
structure is limited, it is not possible to have a field for each
feature. Also, changing fields in the mbuf structure can break the API
or ABI.

This commit addresses these issues, by enabling the dynamic registration
of fields or flags:

- a dynamic field is a named area in the rte_mbuf structure, with a
  given size (>= 1 byte) and alignment constraint.
- a dynamic flag is a named bit in the rte_mbuf structure.

The typical use case is a PMD that registers space for an offload
feature, when the application requests to enable this feature.  As
the space in mbuf is limited, the space should only be reserved if it
is going to be used (i.e when the application explicitly asks for it).

The registration can be done at any moment, but it is not possible
to unregister fields or flags for now.

Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
---
 app/test/test_mbuf.c                 |  83 +++++++-
 lib/librte_mbuf/Makefile             |   2 +
 lib/librte_mbuf/meson.build          |   6 +-
 lib/librte_mbuf/rte_mbuf.h           |  25 ++-
 lib/librte_mbuf/rte_mbuf_dyn.c       | 373 +++++++++++++++++++++++++++++++++++
 lib/librte_mbuf/rte_mbuf_dyn.h       | 119 +++++++++++
 lib/librte_mbuf/rte_mbuf_version.map |   4 +
 7 files changed, 607 insertions(+), 5 deletions(-)
 create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
 create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h

diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index 2a97afe20..8008cc766 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -28,6 +28,7 @@
 #include <rte_random.h>
 #include <rte_cycles.h>
 #include <rte_malloc.h>
+#include <rte_mbuf_dyn.h>
 
 #include "test.h"
 
@@ -502,7 +503,6 @@ test_attach_from_different_pool(struct rte_mempool *pktmbuf_pool,
 		rte_pktmbuf_free(clone2);
 	return -1;
 }
-#undef GOTO_FAIL
 
 /*
  * test allocation and free of mbufs
@@ -1122,6 +1122,81 @@ test_tx_offload(void)
 }
 
 static int
+test_mbuf_dyn(struct rte_mempool *pktmbuf_pool)
+{
+	struct rte_mbuf *m = NULL;
+	int offset, offset2;
+	int flag, flag2;
+
+	offset = rte_mbuf_dynfield_register("test-dynfield", sizeof(uint8_t),
+					__alignof__(uint8_t), 0);
+	if (offset == -1)
+		GOTO_FAIL("failed to register dynamic field, offset=%d: %s",
+			offset, strerror(errno));
+
+	offset2 = rte_mbuf_dynfield_register("test-dynfield", sizeof(uint8_t),
+					__alignof__(uint8_t), 0);
+	if (offset2 != offset)
+		GOTO_FAIL("failed to lookup dynamic field, offset=%d, offset2=%d: %s",
+			offset, offset2, strerror(errno));
+
+	offset2 = rte_mbuf_dynfield_register("test-dynfield2", sizeof(uint16_t),
+					__alignof__(uint16_t), 0);
+	if (offset2 == -1 || offset2 == offset || (offset & 1))
+		GOTO_FAIL("failed to register dynfield field 2, offset=%d, offset2=%d: %s",
+			offset, offset2, strerror(errno));
+
+	printf("offset = %d, offset2 = %d\n", offset, offset2);
+
+	offset = rte_mbuf_dynfield_register("test-dynfield-fail", 256, 1, 0);
+	if (offset != -1)
+		GOTO_FAIL("dynamic field creation should fail (too big)");
+
+	offset = rte_mbuf_dynfield_register("test-dynfield-fail", 1, 3, 0);
+	if (offset != -1)
+		GOTO_FAIL("dynamic field creation should fail (bad alignment)");
+
+	flag = rte_mbuf_dynflag_register("test-dynflag");
+	if (flag == -1)
+		GOTO_FAIL("failed to register dynamic field, flag=%d: %s",
+			flag, strerror(errno));
+
+	flag2 = rte_mbuf_dynflag_register("test-dynflag");
+	if (flag2 != flag)
+		GOTO_FAIL("failed to lookup dynamic field, flag=%d, flag2=%d: %s",
+			flag, flag2, strerror(errno));
+
+	flag2 = rte_mbuf_dynflag_register("test-dynflag2");
+	if (flag2 == -1 || flag2 == flag)
+		GOTO_FAIL("failed to register dynflag field 2, flag=%d, flag2=%d: %s",
+			flag, flag2, strerror(errno));
+
+	printf("flag = %d, flag2 = %d\n", flag, flag2);
+
+	/* set, get dynamic field */
+	m = rte_pktmbuf_alloc(pktmbuf_pool);
+	if (m == NULL)
+		GOTO_FAIL("Cannot allocate mbuf");
+
+	*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) = 1;
+	if (*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) != 1)
+		GOTO_FAIL("failed to read dynamic field");
+	*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) = 1000;
+	if (*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) != 1000)
+		GOTO_FAIL("failed to read dynamic field");
+
+	/* set a dynamic flag */
+	m->ol_flags |= (1ULL << flag);
+
+	rte_pktmbuf_free(m);
+	return 0;
+fail:
+	rte_pktmbuf_free(m);
+	return -1;
+}
+#undef GOTO_FAIL
+
+static int
 test_mbuf(void)
 {
 	int ret = -1;
@@ -1140,6 +1215,12 @@ test_mbuf(void)
 		goto err;
 	}
 
+	/* test registration of dynamic fields and flags */
+	if (test_mbuf_dyn(pktmbuf_pool) < 0) {
+		printf("mbuf dynflag test failed\n");
+		goto err;
+	}
+
 	/* create a specific pktmbuf pool with a priv_size != 0 and no data
 	 * room size */
 	pktmbuf_pool2 = rte_pktmbuf_pool_create("test_pktmbuf_pool2",
diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile
index c8f6d2689..5a9bcee73 100644
--- a/lib/librte_mbuf/Makefile
+++ b/lib/librte_mbuf/Makefile
@@ -17,8 +17,10 @@ LIBABIVER := 5
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c rte_mbuf_pool_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_MBUF) += rte_mbuf_dyn.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h rte_mbuf_ptype.h rte_mbuf_pool_ops.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include += rte_mbuf_dyn.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build
index 6cc11ebb4..9137e8f26 100644
--- a/lib/librte_mbuf/meson.build
+++ b/lib/librte_mbuf/meson.build
@@ -2,8 +2,10 @@
 # Copyright(c) 2017 Intel Corporation
 
 version = 5
-sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c')
-headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
+sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c',
+	'rte_mbuf_dyn.c')
+headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h',
+	'rte_mbuf_dyn.h')
 deps += ['mempool']
 
 allow_experimental_apis = true
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 98225ec80..ef588cd54 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -198,9 +198,12 @@ extern "C" {
 #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
 #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
 
-/* add new RX flags here */
+/* add new RX flags here, don't forget to update PKT_FIRST_FREE */
 
-/* add new TX flags here */
+#define PKT_FIRST_FREE (1ULL << 23)
+#define PKT_LAST_FREE (1ULL << 39)
+
+/* add new TX flags here, don't forget to update PKT_LAST_FREE  */
 
 /**
  * Indicate that the metadata field in the mbuf is in use.
@@ -738,6 +741,8 @@ struct rte_mbuf {
 	 */
 	struct rte_mbuf_ext_shared_info *shinfo;
 
+	uint64_t dynfield1; /**< Reserved for dynamic fields. */
+	uint64_t dynfield2; /**< Reserved for dynamic fields. */
 } __rte_cache_aligned;
 
 /**
@@ -1685,6 +1690,21 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
 #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
 
 /**
+ * Copy dynamic fields from m_src to m_dst.
+ *
+ * @param m_dst
+ *   The destination mbuf.
+ * @param m_src
+ *   The source mbuf.
+ */
+static inline void
+rte_mbuf_dynfield_copy(struct rte_mbuf *m_dst, const struct rte_mbuf *m_src)
+{
+	m_dst->dynfield1 = m_src->dynfield1;
+	m_dst->dynfield2 = m_src->dynfield2;
+}
+
+/**
  * Attach packet mbuf to another packet mbuf.
  *
  * If the mbuf we are attaching to isn't a direct buffer and is attached to
@@ -1732,6 +1752,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 	mi->vlan_tci_outer = m->vlan_tci_outer;
 	mi->tx_offload = m->tx_offload;
 	mi->hash = m->hash;
+	rte_mbuf_dynfield_copy(mi, m);
 
 	mi->next = NULL;
 	mi->pkt_len = mi->data_len;
diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c b/lib/librte_mbuf/rte_mbuf_dyn.c
new file mode 100644
index 000000000..6a96a43da
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf_dyn.c
@@ -0,0 +1,373 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 6WIND S.A.
+ */
+
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_tailq.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+#include <rte_mbuf.h>
+#include <rte_mbuf_dyn.h>
+
+#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
+
+struct mbuf_dynfield {
+	TAILQ_ENTRY(mbuf_dynfield) next;
+	char name[RTE_MBUF_DYN_NAMESIZE];
+	size_t size;
+	size_t align;
+	unsigned int flags;
+	int offset;
+};
+TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
+
+static struct rte_tailq_elem mbuf_dynfield_tailq = {
+	.name = "RTE_MBUF_DYNFIELD",
+};
+EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
+
+struct mbuf_dynflag {
+	TAILQ_ENTRY(mbuf_dynflag) next;
+	char name[RTE_MBUF_DYN_NAMESIZE];
+	int bitnum;
+};
+TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
+
+static struct rte_tailq_elem mbuf_dynflag_tailq = {
+	.name = "RTE_MBUF_DYNFLAG",
+};
+EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
+
+struct mbuf_dyn_shm {
+	/** For each mbuf byte, free_space[i] == 1 if space is free. */
+	uint8_t free_space[sizeof(struct rte_mbuf)];
+	/** Bitfield of available flags. */
+	uint64_t free_flags;
+};
+static struct mbuf_dyn_shm *shm;
+
+/* allocate and initialize the shared memory */
+static int
+init_shared_mem(void)
+{
+	const struct rte_memzone *mz;
+	uint64_t mask;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
+						sizeof(struct mbuf_dyn_shm),
+						SOCKET_ID_ANY, 0,
+						RTE_CACHE_LINE_SIZE);
+	} else {
+		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
+	}
+	if (mz == NULL)
+		return -1;
+
+	shm = mz->addr;
+
+#define mark_free(field)						\
+	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
+		0xff, sizeof(((struct rte_mbuf *)0)->field))
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		/* init free_space, keep it sync'd with
+		 * rte_mbuf_dynfield_copy().
+		 */
+		memset(shm, 0, sizeof(*shm));
+		mark_free(dynfield1);
+		mark_free(dynfield2);
+
+		/* init free_flags */
+		for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask <<= 1)
+			shm->free_flags |= mask;
+	}
+#undef mark_free
+
+	return 0;
+}
+
+/* check if this offset can be used */
+static int
+check_offset(size_t offset, size_t size, size_t align, unsigned int flags)
+{
+	size_t i;
+
+	(void)flags;
+
+	if ((offset & (align - 1)) != 0)
+		return -1;
+	if (offset + size > sizeof(struct rte_mbuf))
+		return -1;
+
+	for (i = 0; i < size; i++) {
+		if (!shm->free_space[i + offset])
+			return -1;
+	}
+
+	return 0;
+}
+
+/* assume tailq is locked */
+static struct mbuf_dynfield *
+__mbuf_dynfield_lookup(const char *name)
+{
+	struct mbuf_dynfield_list *mbuf_dynfield_list;
+	struct mbuf_dynfield *mbuf_dynfield;
+	struct rte_tailq_entry *te;
+
+	mbuf_dynfield_list = RTE_TAILQ_CAST(
+		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
+
+	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
+		mbuf_dynfield = (struct mbuf_dynfield *)te->data;
+		if (strncmp(name, mbuf_dynfield->name,
+				RTE_MBUF_DYN_NAMESIZE) == 0)
+			break;
+	}
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return mbuf_dynfield;
+}
+
+int
+rte_mbuf_dynfield_lookup(const char *name, size_t *size, size_t *align)
+{
+	struct mbuf_dynfield *mbuf_dynfield;
+
+	if (shm == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	rte_mcfg_tailq_read_lock();
+	mbuf_dynfield = __mbuf_dynfield_lookup(name);
+	rte_mcfg_tailq_read_unlock();
+
+	if (mbuf_dynfield == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	if (size != NULL)
+		*size = mbuf_dynfield->size;
+	if (align != NULL)
+		*align = mbuf_dynfield->align;
+
+	return mbuf_dynfield->offset;
+}
+
+int
+rte_mbuf_dynfield_register(const char *name, size_t size, size_t align,
+			unsigned int flags)
+{
+	struct mbuf_dynfield_list *mbuf_dynfield_list;
+	struct mbuf_dynfield *mbuf_dynfield = NULL;
+	struct rte_tailq_entry *te = NULL;
+	int offset, ret;
+	size_t i;
+
+	if (shm == NULL && init_shared_mem() < 0)
+		goto fail;
+	if (size >= sizeof(struct rte_mbuf)) {
+		rte_errno = EINVAL;
+		goto fail;
+	}
+	if (!rte_is_power_of_2(align)) {
+		rte_errno = EINVAL;
+		goto fail;
+	}
+
+	rte_mcfg_tailq_write_lock();
+
+	mbuf_dynfield = __mbuf_dynfield_lookup(name);
+	if (mbuf_dynfield != NULL) {
+		if (mbuf_dynfield->size != size ||
+				mbuf_dynfield->align != align ||
+				mbuf_dynfield->flags != flags) {
+			rte_errno = EEXIST;
+			goto fail_unlock;
+		}
+		offset = mbuf_dynfield->offset;
+		goto out_unlock;
+	}
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		rte_errno = EPERM;
+		goto fail_unlock;
+	}
+
+	for (offset = 0;
+	     offset < (int)sizeof(struct rte_mbuf);
+	     offset++) {
+		if (check_offset(offset, size, align, flags) == 0)
+			break;
+	}
+
+	if (offset == sizeof(struct rte_mbuf)) {
+		rte_errno = ENOENT;
+		goto fail_unlock;
+	}
+
+	mbuf_dynfield_list = RTE_TAILQ_CAST(
+		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
+
+	te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL)
+		goto fail_unlock;
+
+	mbuf_dynfield = rte_zmalloc("mbuf_dynfield", sizeof(*mbuf_dynfield), 0);
+	if (mbuf_dynfield == NULL)
+		goto fail_unlock;
+
+	ret = strlcpy(mbuf_dynfield->name, name, sizeof(mbuf_dynfield->name));
+	if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->name)) {
+		rte_errno = ENAMETOOLONG;
+		goto fail_unlock;
+	}
+	mbuf_dynfield->size = size;
+	mbuf_dynfield->align = align;
+	mbuf_dynfield->flags = flags;
+	mbuf_dynfield->offset = offset;
+	te->data = mbuf_dynfield;
+
+	TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
+
+	for (i = offset; i < offset + size; i++)
+		shm->free_space[i] = 0;
+
+out_unlock:
+	rte_mcfg_tailq_write_unlock();
+
+	return offset;
+
+fail_unlock:
+	rte_mcfg_tailq_write_unlock();
+fail:
+	rte_free(mbuf_dynfield);
+	rte_free(te);
+	return -1;
+}
+
+/* assume tailq is locked */
+static struct mbuf_dynflag *
+__mbuf_dynflag_lookup(const char *name)
+{
+	struct mbuf_dynflag_list *mbuf_dynflag_list;
+	struct mbuf_dynflag *mbuf_dynflag;
+	struct rte_tailq_entry *te;
+
+	mbuf_dynflag_list = RTE_TAILQ_CAST(
+		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
+
+	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
+		mbuf_dynflag = (struct mbuf_dynflag *)te->data;
+		if (strncmp(name, mbuf_dynflag->name,
+				RTE_MBUF_DYN_NAMESIZE) == 0)
+			break;
+	}
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return mbuf_dynflag;
+}
+
+int
+rte_mbuf_dynflag_lookup(const char *name)
+{
+	struct mbuf_dynflag *mbuf_dynflag;
+
+	if (shm == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	rte_mcfg_tailq_read_lock();
+	mbuf_dynflag = __mbuf_dynflag_lookup(name);
+	rte_mcfg_tailq_read_unlock();
+
+	if (mbuf_dynflag == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	return mbuf_dynflag->bitnum;
+}
+
+int
+rte_mbuf_dynflag_register(const char *name)
+{
+	struct mbuf_dynflag_list *mbuf_dynflag_list;
+	struct mbuf_dynflag *mbuf_dynflag = NULL;
+	struct rte_tailq_entry *te = NULL;
+	int bitnum, ret;
+
+	if (shm == NULL && init_shared_mem() < 0)
+		goto fail;
+
+	rte_mcfg_tailq_write_lock();
+
+	mbuf_dynflag = __mbuf_dynflag_lookup(name);
+	if (mbuf_dynflag != NULL) {
+		bitnum = mbuf_dynflag->bitnum;
+		goto out_unlock;
+	}
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		rte_errno = EPERM;
+		goto fail_unlock;
+	}
+
+	if (shm->free_flags == 0) {
+		rte_errno = ENOENT;
+		goto fail_unlock;
+	}
+	bitnum = rte_bsf64(shm->free_flags);
+
+	mbuf_dynflag_list = RTE_TAILQ_CAST(
+		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
+
+	te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL)
+		goto fail_unlock;
+
+	mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag), 0);
+	if (mbuf_dynflag == NULL)
+		goto fail_unlock;
+
+	ret = strlcpy(mbuf_dynflag->name, name, sizeof(mbuf_dynflag->name));
+	if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->name)) {
+		rte_errno = ENAMETOOLONG;
+		goto fail_unlock;
+	}
+	mbuf_dynflag->bitnum = bitnum;
+	te->data = mbuf_dynflag;
+
+	TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
+
+	shm->free_flags &= ~(1ULL << bitnum);
+
+out_unlock:
+	rte_mcfg_tailq_write_unlock();
+
+	return bitnum;
+
+fail_unlock:
+	rte_mcfg_tailq_write_unlock();
+fail:
+	rte_free(mbuf_dynflag);
+	rte_free(te);
+	return -1;
+}
diff --git a/lib/librte_mbuf/rte_mbuf_dyn.h b/lib/librte_mbuf/rte_mbuf_dyn.h
new file mode 100644
index 000000000..a86986a0f
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf_dyn.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 6WIND S.A.
+ */
+
+#ifndef _RTE_MBUF_DYN_H_
+#define _RTE_MBUF_DYN_H_
+
+/**
+ * @file
+ * RTE Mbuf dynamic fields and flags
+ *
+ * Many features require to store data inside the mbuf. As the room in
+ * mbuf structure is limited, it is not possible to have a field for
+ * each feature. Also, changing fields in the mbuf structure can break
+ * the API or ABI.
+ *
+ * This module addresses this issue, by enabling the dynamic
+ * registration of fields or flags:
+ *
+ * - a dynamic field is a named area in the rte_mbuf structure, with a
+ *   given size (>= 1 byte) and alignment constraint.
+ * - a dynamic flag is a named bit in the rte_mbuf structure.
+ *
+ * The typical use case is a PMD that registers space for an offload
+ * feature, when the application requests to enable this feature.  As
+ * the space in mbuf is limited, the space should only be reserved if it
+ * is going to be used (i.e when the application explicitly asks for it).
+ *
+ * The registration can be done at any moment, but it is not possible
+ * to unregister fields or flags for now.
+ *
+ * Example of use:
+ *
+ * - RTE_MBUF_DYN_<feature>_(ID|SIZE|ALIGN) are defined in this file
+ * - If the application asks for the feature, the PMD use
+ *   rte_mbuf_dynfield_register() to get the dynamic offset and stores
+ *   in a global variable.
+ * - The application also calls rte_mbuf_dynfield_register() to get the
+ *   dynamic offset and stores it in a global variable.
+ * - When the field must be used by the PMD or the application, they
+ *   use the RTE_MBUF_DYNFIELD() helper.
+ */
+
+struct rte_mbuf;
+
+/**
+ * Register space for a dynamic field in the mbuf structure.
+ *
+ * @param name
+ *   A string identifying the dynamic field. External applications or
+ *   libraries must not define identifers prefixed with "rte_", which
+ *   are reserved for standard features.
+ * @param size
+ *   The number of bytes to reserve.
+ * @param align
+ *   The alignment constraint, which must be a power of 2.
+ * @param flags
+ *   Reserved for future use.
+ * @return
+ *   The offset in the mbuf structure, or -1 on error (rte_errno is set).
+ */
+__rte_experimental
+int rte_mbuf_dynfield_register(const char *name, size_t size, size_t align,
+			unsigned int flags);
+
+/**
+ * Lookup for a registered dynamic mbuf field.
+ *
+ * @param name
+ *   A string identifying the dynamic field.
+ * @param size
+ *   If not NULL, the number of reserved bytes for this field is stored
+ *   at this address.
+ * @param align
+ *   If not NULL, the alignement constraint for this field is stored
+ *   at this address.
+ * @return
+ *   The offset of this field in the mbuf structure, or -1 on error
+ *   (rte_errno is set).
+ */
+__rte_experimental
+int rte_mbuf_dynfield_lookup(const char *name, size_t *size, size_t *align);
+
+/**
+ * Register a dynamic flag in the mbuf structure.
+ *
+ * @param name
+ *   A string identifying the dynamic flag. External applications or
+ *   libraries must not define identifers prefixed with "rte_", which
+ *   are reserved for standard features.
+ * @return
+ *   The number of the reserved bit, or -1 on error (rte_errno is set).
+ */
+__rte_experimental
+int rte_mbuf_dynflag_register(const char *name);
+
+/**
+ * Lookup for a registered dynamic mbuf flag.
+ *
+ * @param name
+ *   A string identifying the dynamic flag.
+ * @return
+ *   The offset of this flag in the mbuf structure, or -1 on error
+ *   (rte_errno is set).
+ */
+__rte_experimental
+int rte_mbuf_dynflag_lookup(const char *name);
+
+/**
+ * Helper macro to access to a dynamic field.
+ */
+#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((char *)(m) + (offset)))
+
+/**
+ * Maximum length of the dynamic field or flag string.
+ */
+#define RTE_MBUF_DYN_NAMESIZE 32
+
+#endif
diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
index 2662a37bf..a98310570 100644
--- a/lib/librte_mbuf/rte_mbuf_version.map
+++ b/lib/librte_mbuf/rte_mbuf_version.map
@@ -50,4 +50,8 @@ EXPERIMENTAL {
 	global:
 
 	rte_mbuf_check;
+	rte_mbuf_dynfield_lookup;
+	rte_mbuf_dynfield_register;
+	rte_mbuf_dynflag_lookup;
+	rte_mbuf_dynflag_register;
 } DPDK_18.08;
-- 
2.11.0


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-10  9:29 [dpdk-dev] [RFC] mbuf: support dynamic fields and flags Olivier Matz
@ 2019-07-10 17:14 ` Wang, Haiyue
  2019-07-11  7:26   ` Olivier Matz
  2019-07-10 17:49 ` Stephen Hemminger
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 64+ messages in thread
From: Wang, Haiyue @ 2019-07-10 17:14 UTC (permalink / raw)
  To: Olivier Matz, dev

Hi,

Sounds cool, just have some questions inline.

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier Matz
> Sent: Wednesday, July 10, 2019 17:29
> To: dev@dpdk.org
> Subject: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
> 
> Many features require to store data inside the mbuf. As the room in mbuf
> structure is limited, it is not possible to have a field for each
> feature. Also, changing fields in the mbuf structure can break the API
> or ABI.
> 
> This commit addresses these issues, by enabling the dynamic registration
> of fields or flags:
> 
> - a dynamic field is a named area in the rte_mbuf structure, with a
>   given size (>= 1 byte) and alignment constraint.
> - a dynamic flag is a named bit in the rte_mbuf structure.
> 
> The typical use case is a PMD that registers space for an offload
> feature, when the application requests to enable this feature.  As
> the space in mbuf is limited, the space should only be reserved if it
> is going to be used (i.e when the application explicitly asks for it).
> 
> The registration can be done at any moment, but it is not possible
> to unregister fields or flags for now.
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---
>  app/test/test_mbuf.c                 |  83 +++++++-
>  lib/librte_mbuf/Makefile             |   2 +
>  lib/librte_mbuf/meson.build          |   6 +-
>  lib/librte_mbuf/rte_mbuf.h           |  25 ++-
>  lib/librte_mbuf/rte_mbuf_dyn.c       | 373 +++++++++++++++++++++++++++++++++++
>  lib/librte_mbuf/rte_mbuf_dyn.h       | 119 +++++++++++
>  lib/librte_mbuf/rte_mbuf_version.map |   4 +
>  7 files changed, 607 insertions(+), 5 deletions(-)
>  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
>  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> 
> diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> index 2a97afe20..8008cc766 100644
> --- a/app/test/test_mbuf.c
> +++ b/app/test/test_mbuf.c
> @@ -28,6 +28,7 @@
>  #include <rte_random.h>
>  #include <rte_cycles.h>
>  #include <rte_malloc.h>
> +#include <rte_mbuf_dyn.h>
> 
>  #include "test.h"
> 
> @@ -502,7 +503,6 @@ test_attach_from_different_pool(struct rte_mempool *pktmbuf_pool,
>  		rte_pktmbuf_free(clone2);
>  	return -1;
>  }
> -#undef GOTO_FAIL
> 
>  /*
>   * test allocation and free of mbufs
> @@ -1122,6 +1122,81 @@ test_tx_offload(void)
>  }
> 
>  static int
> +test_mbuf_dyn(struct rte_mempool *pktmbuf_pool)
> +{
> +	struct rte_mbuf *m = NULL;
> +	int offset, offset2;
> +	int flag, flag2;
> +
> +	offset = rte_mbuf_dynfield_register("test-dynfield", sizeof(uint8_t),
> +					__alignof__(uint8_t), 0);
> +	if (offset == -1)
> +		GOTO_FAIL("failed to register dynamic field, offset=%d: %s",
> +			offset, strerror(errno));
> +
> +	offset2 = rte_mbuf_dynfield_register("test-dynfield", sizeof(uint8_t),
> +					__alignof__(uint8_t), 0);
> +	if (offset2 != offset)
> +		GOTO_FAIL("failed to lookup dynamic field, offset=%d, offset2=%d: %s",
> +			offset, offset2, strerror(errno));
> +
> +	offset2 = rte_mbuf_dynfield_register("test-dynfield2", sizeof(uint16_t),
> +					__alignof__(uint16_t), 0);
> +	if (offset2 == -1 || offset2 == offset || (offset & 1))
> +		GOTO_FAIL("failed to register dynfield field 2, offset=%d, offset2=%d: %s",
> +			offset, offset2, strerror(errno));
> +
> +	printf("offset = %d, offset2 = %d\n", offset, offset2);
> +
> +	offset = rte_mbuf_dynfield_register("test-dynfield-fail", 256, 1, 0);
> +	if (offset != -1)
> +		GOTO_FAIL("dynamic field creation should fail (too big)");
> +
> +	offset = rte_mbuf_dynfield_register("test-dynfield-fail", 1, 3, 0);
> +	if (offset != -1)
> +		GOTO_FAIL("dynamic field creation should fail (bad alignment)");
> +
> +	flag = rte_mbuf_dynflag_register("test-dynflag");
> +	if (flag == -1)
> +		GOTO_FAIL("failed to register dynamic field, flag=%d: %s",
> +			flag, strerror(errno));
> +
> +	flag2 = rte_mbuf_dynflag_register("test-dynflag");
> +	if (flag2 != flag)
> +		GOTO_FAIL("failed to lookup dynamic field, flag=%d, flag2=%d: %s",
> +			flag, flag2, strerror(errno));
> +
> +	flag2 = rte_mbuf_dynflag_register("test-dynflag2");
> +	if (flag2 == -1 || flag2 == flag)
> +		GOTO_FAIL("failed to register dynflag field 2, flag=%d, flag2=%d: %s",
> +			flag, flag2, strerror(errno));
> +
> +	printf("flag = %d, flag2 = %d\n", flag, flag2);
> +
> +	/* set, get dynamic field */
> +	m = rte_pktmbuf_alloc(pktmbuf_pool);
> +	if (m == NULL)
> +		GOTO_FAIL("Cannot allocate mbuf");
> +
> +	*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) = 1;
> +	if (*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) != 1)
> +		GOTO_FAIL("failed to read dynamic field");
> +	*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) = 1000;
> +	if (*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) != 1000)
> +		GOTO_FAIL("failed to read dynamic field");
> +
> +	/* set a dynamic flag */
> +	m->ol_flags |= (1ULL << flag);
> +
> +	rte_pktmbuf_free(m);
> +	return 0;
> +fail:
> +	rte_pktmbuf_free(m);
> +	return -1;
> +}
> +#undef GOTO_FAIL
> +
> +static int
>  test_mbuf(void)
>  {
>  	int ret = -1;
> @@ -1140,6 +1215,12 @@ test_mbuf(void)
>  		goto err;
>  	}
> 
> +	/* test registration of dynamic fields and flags */
> +	if (test_mbuf_dyn(pktmbuf_pool) < 0) {
> +		printf("mbuf dynflag test failed\n");
> +		goto err;
> +	}
> +
>  	/* create a specific pktmbuf pool with a priv_size != 0 and no data
>  	 * room size */
>  	pktmbuf_pool2 = rte_pktmbuf_pool_create("test_pktmbuf_pool2",
> diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile
> index c8f6d2689..5a9bcee73 100644
> --- a/lib/librte_mbuf/Makefile
> +++ b/lib/librte_mbuf/Makefile
> @@ -17,8 +17,10 @@ LIBABIVER := 5
> 
>  # all source are stored in SRCS-y
>  SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c rte_mbuf_pool_ops.c
> +SRCS-$(CONFIG_RTE_LIBRTE_MBUF) += rte_mbuf_dyn.c
> 
>  # install includes
>  SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h rte_mbuf_ptype.h rte_mbuf_pool_ops.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include += rte_mbuf_dyn.h
> 
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build
> index 6cc11ebb4..9137e8f26 100644
> --- a/lib/librte_mbuf/meson.build
> +++ b/lib/librte_mbuf/meson.build
> @@ -2,8 +2,10 @@
>  # Copyright(c) 2017 Intel Corporation
> 
>  version = 5
> -sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c')
> -headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
> +sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c',
> +	'rte_mbuf_dyn.c')
> +headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h',
> +	'rte_mbuf_dyn.h')
>  deps += ['mempool']
> 
>  allow_experimental_apis = true
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 98225ec80..ef588cd54 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -198,9 +198,12 @@ extern "C" {
>  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
>  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
> 
> -/* add new RX flags here */
> +/* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> 
> -/* add new TX flags here */
> +#define PKT_FIRST_FREE (1ULL << 23)
> +#define PKT_LAST_FREE (1ULL << 39)
> +
> +/* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> 
>  /**
>   * Indicate that the metadata field in the mbuf is in use.
> @@ -738,6 +741,8 @@ struct rte_mbuf {
>  	 */
>  	struct rte_mbuf_ext_shared_info *shinfo;
> 
> +	uint64_t dynfield1; /**< Reserved for dynamic fields. */
> +	uint64_t dynfield2; /**< Reserved for dynamic fields. */
>  } __rte_cache_aligned;
> 
>  /**
> @@ -1685,6 +1690,21 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
>  #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
> 
>  /**
> + * Copy dynamic fields from m_src to m_dst.
> + *
> + * @param m_dst
> + *   The destination mbuf.
> + * @param m_src
> + *   The source mbuf.
> + */
> +static inline void
> +rte_mbuf_dynfield_copy(struct rte_mbuf *m_dst, const struct rte_mbuf *m_src)
> +{
> +	m_dst->dynfield1 = m_src->dynfield1;
> +	m_dst->dynfield2 = m_src->dynfield2;
> +}
> +
> +/**
>   * Attach packet mbuf to another packet mbuf.
>   *
>   * If the mbuf we are attaching to isn't a direct buffer and is attached to
> @@ -1732,6 +1752,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
>  	mi->vlan_tci_outer = m->vlan_tci_outer;
>  	mi->tx_offload = m->tx_offload;
>  	mi->hash = m->hash;
> +	rte_mbuf_dynfield_copy(mi, m);
> 
>  	mi->next = NULL;
>  	mi->pkt_len = mi->data_len;
> diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c b/lib/librte_mbuf/rte_mbuf_dyn.c
> new file mode 100644
> index 000000000..6a96a43da
> --- /dev/null
> +++ b/lib/librte_mbuf/rte_mbuf_dyn.c
> @@ -0,0 +1,373 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2019 6WIND S.A.
> + */
> +
> +#include <sys/queue.h>
> +
> +#include <rte_common.h>
> +#include <rte_eal.h>
> +#include <rte_eal_memconfig.h>
> +#include <rte_tailq.h>
> +#include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_string_fns.h>
> +#include <rte_mbuf.h>
> +#include <rte_mbuf_dyn.h>
> +
> +#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
> +
> +struct mbuf_dynfield {
> +	TAILQ_ENTRY(mbuf_dynfield) next;
> +	char name[RTE_MBUF_DYN_NAMESIZE];
> +	size_t size;
> +	size_t align;
> +	unsigned int flags;
> +	int offset;
> +};
> +TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem mbuf_dynfield_tailq = {
> +	.name = "RTE_MBUF_DYNFIELD",
> +};
> +EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
> +
> +struct mbuf_dynflag {
> +	TAILQ_ENTRY(mbuf_dynflag) next;
> +	char name[RTE_MBUF_DYN_NAMESIZE];
> +	int bitnum;
> +};
> +TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem mbuf_dynflag_tailq = {
> +	.name = "RTE_MBUF_DYNFLAG",
> +};
> +EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
> +
> +struct mbuf_dyn_shm {
> +	/** For each mbuf byte, free_space[i] == 1 if space is free. */
> +	uint8_t free_space[sizeof(struct rte_mbuf)];
> +	/** Bitfield of available flags. */
> +	uint64_t free_flags;
> +};
> +static struct mbuf_dyn_shm *shm;
> +
> +/* allocate and initialize the shared memory */
> +static int
> +init_shared_mem(void)
> +{
> +	const struct rte_memzone *mz;
> +	uint64_t mask;
> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
> +						sizeof(struct mbuf_dyn_shm),
> +						SOCKET_ID_ANY, 0,
> +						RTE_CACHE_LINE_SIZE);
> +	} else {
> +		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
> +	}
> +	if (mz == NULL)
> +		return -1;
> +
> +	shm = mz->addr;
> +
> +#define mark_free(field)						\
> +	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
> +		0xff, sizeof(((struct rte_mbuf *)0)->field))
> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		/* init free_space, keep it sync'd with
> +		 * rte_mbuf_dynfield_copy().
> +		 */
> +		memset(shm, 0, sizeof(*shm));
> +		mark_free(dynfield1);
> +		mark_free(dynfield2);
> +
> +		/* init free_flags */
> +		for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask <<= 1)
> +			shm->free_flags |= mask;
> +	}
> +#undef mark_free
> +
> +	return 0;
> +}
> +
> +/* check if this offset can be used */
> +static int
> +check_offset(size_t offset, size_t size, size_t align, unsigned int flags)
> +{
> +	size_t i;
> +
> +	(void)flags;
> +
> +	if ((offset & (align - 1)) != 0)
> +		return -1;
> +	if (offset + size > sizeof(struct rte_mbuf))
> +		return -1;
> +
> +	for (i = 0; i < size; i++) {
> +		if (!shm->free_space[i + offset])
> +			return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +/* assume tailq is locked */
> +static struct mbuf_dynfield *
> +__mbuf_dynfield_lookup(const char *name)
> +{
> +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> +	struct mbuf_dynfield *mbuf_dynfield;
> +	struct rte_tailq_entry *te;
> +
> +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> +
> +	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
> +		mbuf_dynfield = (struct mbuf_dynfield *)te->data;
> +		if (strncmp(name, mbuf_dynfield->name,
> +				RTE_MBUF_DYN_NAMESIZE) == 0)
> +			break;
> +	}
> +
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +
> +	return mbuf_dynfield;
> +}
> +
> +int
> +rte_mbuf_dynfield_lookup(const char *name, size_t *size, size_t *align)
> +{
> +	struct mbuf_dynfield *mbuf_dynfield;
> +
> +	if (shm == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_read_lock();
> +	mbuf_dynfield = __mbuf_dynfield_lookup(name);
> +	rte_mcfg_tailq_read_unlock();
> +
> +	if (mbuf_dynfield == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	if (size != NULL)
> +		*size = mbuf_dynfield->size;
> +	if (align != NULL)
> +		*align = mbuf_dynfield->align;
> +
> +	return mbuf_dynfield->offset;
> +}
> +
> +int
> +rte_mbuf_dynfield_register(const char *name, size_t size, size_t align,
> +			unsigned int flags)
> +{
> +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> +	struct mbuf_dynfield *mbuf_dynfield = NULL;
> +	struct rte_tailq_entry *te = NULL;
> +	int offset, ret;
> +	size_t i;
> +
> +	if (shm == NULL && init_shared_mem() < 0)
> +		goto fail;
> +	if (size >= sizeof(struct rte_mbuf)) {
> +		rte_errno = EINVAL;
> +		goto fail;
> +	}
> +	if (!rte_is_power_of_2(align)) {
> +		rte_errno = EINVAL;
> +		goto fail;
> +	}
> +
> +	rte_mcfg_tailq_write_lock();
> +
> +	mbuf_dynfield = __mbuf_dynfield_lookup(name);
> +	if (mbuf_dynfield != NULL) {
> +		if (mbuf_dynfield->size != size ||
> +				mbuf_dynfield->align != align ||
> +				mbuf_dynfield->flags != flags) {
> +			rte_errno = EEXIST;
> +			goto fail_unlock;
> +		}
> +		offset = mbuf_dynfield->offset;
> +		goto out_unlock;
> +	}
> +
> +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> +		rte_errno = EPERM;
> +		goto fail_unlock;
> +	}
> +
> +	for (offset = 0;
> +	     offset < (int)sizeof(struct rte_mbuf);
> +	     offset++) {
> +		if (check_offset(offset, size, align, flags) == 0)
> +			break;
> +	}
> +
> +	if (offset == sizeof(struct rte_mbuf)) {
> +		rte_errno = ENOENT;
> +		goto fail_unlock;
> +	}
> +
> +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> +
> +	te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL)
> +		goto fail_unlock;
> +
> +	mbuf_dynfield = rte_zmalloc("mbuf_dynfield", sizeof(*mbuf_dynfield), 0);
> +	if (mbuf_dynfield == NULL)
> +		goto fail_unlock;
> +
> +	ret = strlcpy(mbuf_dynfield->name, name, sizeof(mbuf_dynfield->name));
> +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->name)) {
> +		rte_errno = ENAMETOOLONG;
> +		goto fail_unlock;
> +	}
> +	mbuf_dynfield->size = size;
> +	mbuf_dynfield->align = align;
> +	mbuf_dynfield->flags = flags;
> +	mbuf_dynfield->offset = offset;
> +	te->data = mbuf_dynfield;
> +
> +	TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
> +
> +	for (i = offset; i < offset + size; i++)
> +		shm->free_space[i] = 0;
> +
> +out_unlock:
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return offset;
> +
> +fail_unlock:
> +	rte_mcfg_tailq_write_unlock();
> +fail:
> +	rte_free(mbuf_dynfield);
> +	rte_free(te);
> +	return -1;
> +}
> +
> +/* assume tailq is locked */
> +static struct mbuf_dynflag *
> +__mbuf_dynflag_lookup(const char *name)
> +{
> +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> +	struct mbuf_dynflag *mbuf_dynflag;
> +	struct rte_tailq_entry *te;
> +
> +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> +
> +	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
> +		mbuf_dynflag = (struct mbuf_dynflag *)te->data;
> +		if (strncmp(name, mbuf_dynflag->name,
> +				RTE_MBUF_DYN_NAMESIZE) == 0)
> +			break;
> +	}
> +
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +
> +	return mbuf_dynflag;
> +}
> +
> +int
> +rte_mbuf_dynflag_lookup(const char *name)
> +{
> +	struct mbuf_dynflag *mbuf_dynflag;
> +
> +	if (shm == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_read_lock();
> +	mbuf_dynflag = __mbuf_dynflag_lookup(name);
> +	rte_mcfg_tailq_read_unlock();
> +
> +	if (mbuf_dynflag == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	return mbuf_dynflag->bitnum;
> +}
> +
> +int
> +rte_mbuf_dynflag_register(const char *name)
> +{
> +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> +	struct mbuf_dynflag *mbuf_dynflag = NULL;
> +	struct rte_tailq_entry *te = NULL;
> +	int bitnum, ret;
> +
> +	if (shm == NULL && init_shared_mem() < 0)
> +		goto fail;
> +
> +	rte_mcfg_tailq_write_lock();
> +
> +	mbuf_dynflag = __mbuf_dynflag_lookup(name);
> +	if (mbuf_dynflag != NULL) {
> +		bitnum = mbuf_dynflag->bitnum;
> +		goto out_unlock;
> +	}
> +
> +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> +		rte_errno = EPERM;
> +		goto fail_unlock;
> +	}
> +
> +	if (shm->free_flags == 0) {
> +		rte_errno = ENOENT;
> +		goto fail_unlock;
> +	}
> +	bitnum = rte_bsf64(shm->free_flags);
> +
> +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> +
> +	te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL)
> +		goto fail_unlock;
> +
> +	mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag), 0);
> +	if (mbuf_dynflag == NULL)
> +		goto fail_unlock;
> +
> +	ret = strlcpy(mbuf_dynflag->name, name, sizeof(mbuf_dynflag->name));
> +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->name)) {
> +		rte_errno = ENAMETOOLONG;
> +		goto fail_unlock;
> +	}
> +	mbuf_dynflag->bitnum = bitnum;
> +	te->data = mbuf_dynflag;
> +
> +	TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
> +
> +	shm->free_flags &= ~(1ULL << bitnum);
> +
> +out_unlock:
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return bitnum;
> +
> +fail_unlock:
> +	rte_mcfg_tailq_write_unlock();
> +fail:
> +	rte_free(mbuf_dynflag);
> +	rte_free(te);
> +	return -1;
> +}
> diff --git a/lib/librte_mbuf/rte_mbuf_dyn.h b/lib/librte_mbuf/rte_mbuf_dyn.h
> new file mode 100644
> index 000000000..a86986a0f
> --- /dev/null
> +++ b/lib/librte_mbuf/rte_mbuf_dyn.h
> @@ -0,0 +1,119 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2019 6WIND S.A.
> + */
> +
> +#ifndef _RTE_MBUF_DYN_H_
> +#define _RTE_MBUF_DYN_H_
> +
> +/**
> + * @file
> + * RTE Mbuf dynamic fields and flags
> + *
> + * Many features require to store data inside the mbuf. As the room in
> + * mbuf structure is limited, it is not possible to have a field for
> + * each feature. Also, changing fields in the mbuf structure can break
> + * the API or ABI.
> + *
> + * This module addresses this issue, by enabling the dynamic
> + * registration of fields or flags:
> + *
> + * - a dynamic field is a named area in the rte_mbuf structure, with a
> + *   given size (>= 1 byte) and alignment constraint.
> + * - a dynamic flag is a named bit in the rte_mbuf structure.
> + *
> + * The typical use case is a PMD that registers space for an offload
> + * feature, when the application requests to enable this feature.  As
> + * the space in mbuf is limited, the space should only be reserved if it
> + * is going to be used (i.e when the application explicitly asks for it).
> + *
> + * The registration can be done at any moment, but it is not possible
> + * to unregister fields or flags for now.
> + *
> + * Example of use:
> + *
> + * - RTE_MBUF_DYN_<feature>_(ID|SIZE|ALIGN) are defined in this file

Does it means that all PMDs define their own 'RTE_MBUF_DYN_<feature>_(ID|SIZE|ALIGN)'
here ? In other words, each PMD can expose its private DYN_<feature> here for public
using ?

How about adding another eth_dev_ops API definitions to show the PMD's supporting feature
names, sizes, align in run time for testpmd ? And also another eth_dev_ops API for showing
the data saved in rte_mbuf by 'dump_pkt_burst' ? Adding a new command for testpmd to set
the dynamic feature may be good for PMD test.

> + * - If the application asks for the feature, the PMD use

How does the application ask for the feature ? By ' rte_mbuf_dynfield_register()' ?

> + *   rte_mbuf_dynfield_register() to get the dynamic offset and stores
> + *   in a global variable.

In case, the PMD calls 'rte_mbuf_dynfield_register()' for 'dyn_feature' firstly, this
means that PMD requests the dynamic feature itself if I understand correctly. Should
PMD calls 'rte_mbuf_dynfield_lookup' for 'dyn_feature' to query the name exists, the
size and align are right as expected ? If exists, but size and align are not right, may
be for PMD change its definition, then PMD can give a warning or error message. If name
exists, both size and align are expected, then PMD think that the application request
the right dynamic features.

> + * - The application also calls rte_mbuf_dynfield_register() to get the
> + *   dynamic offset and stores it in a global variable.
> + * - When the field must be used by the PMD or the application, they
> + *   use the RTE_MBUF_DYNFIELD() helper.
> + */
> +
> +struct rte_mbuf;
> +
> +/**
> + * Register space for a dynamic field in the mbuf structure.
> + *
> + * @param name
> + *   A string identifying the dynamic field. External applications or
> + *   libraries must not define identifers prefixed with "rte_", which
> + *   are reserved for standard features.
> + * @param size
> + *   The number of bytes to reserve.
> + * @param align
> + *   The alignment constraint, which must be a power of 2.
> + * @param flags
> + *   Reserved for future use.
> + * @return
> + *   The offset in the mbuf structure, or -1 on error (rte_errno is set).
> + */
> +__rte_experimental
> +int rte_mbuf_dynfield_register(const char *name, size_t size, size_t align,
> +			unsigned int flags);
> +
> +/**
> + * Lookup for a registered dynamic mbuf field.
> + *
> + * @param name
> + *   A string identifying the dynamic field.
> + * @param size
> + *   If not NULL, the number of reserved bytes for this field is stored
> + *   at this address.
> + * @param align
> + *   If not NULL, the alignement constraint for this field is stored
> + *   at this address.
> + * @return
> + *   The offset of this field in the mbuf structure, or -1 on error
> + *   (rte_errno is set).
> + */
> +__rte_experimental
> +int rte_mbuf_dynfield_lookup(const char *name, size_t *size, size_t *align);
> +
> +/**
> + * Register a dynamic flag in the mbuf structure.
> + *
> + * @param name
> + *   A string identifying the dynamic flag. External applications or
> + *   libraries must not define identifers prefixed with "rte_", which
> + *   are reserved for standard features.
> + * @return
> + *   The number of the reserved bit, or -1 on error (rte_errno is set).
> + */
> +__rte_experimental
> +int rte_mbuf_dynflag_register(const char *name);
> +
> +/**
> + * Lookup for a registered dynamic mbuf flag.
> + *
> + * @param name
> + *   A string identifying the dynamic flag.
> + * @return
> + *   The offset of this flag in the mbuf structure, or -1 on error
> + *   (rte_errno is set).
> + */
> +__rte_experimental
> +int rte_mbuf_dynflag_lookup(const char *name);
> +
> +/**
> + * Helper macro to access to a dynamic field.
> + */
> +#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((char *)(m) + (offset)))
> +
> +/**
> + * Maximum length of the dynamic field or flag string.
> + */
> +#define RTE_MBUF_DYN_NAMESIZE 32
> +
> +#endif
> diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
> index 2662a37bf..a98310570 100644
> --- a/lib/librte_mbuf/rte_mbuf_version.map
> +++ b/lib/librte_mbuf/rte_mbuf_version.map
> @@ -50,4 +50,8 @@ EXPERIMENTAL {
>  	global:
> 
>  	rte_mbuf_check;
> +	rte_mbuf_dynfield_lookup;
> +	rte_mbuf_dynfield_register;
> +	rte_mbuf_dynflag_lookup;
> +	rte_mbuf_dynflag_register;
>  } DPDK_18.08;
> --
> 2.11.0


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-10  9:29 [dpdk-dev] [RFC] mbuf: support dynamic fields and flags Olivier Matz
  2019-07-10 17:14 ` Wang, Haiyue
@ 2019-07-10 17:49 ` Stephen Hemminger
  2019-07-10 18:12   ` Wiles, Keith
  2019-07-11  7:36   ` Olivier Matz
  2019-07-11  9:24 ` Thomas Monjalon
                   ` (5 subsequent siblings)
  7 siblings, 2 replies; 64+ messages in thread
From: Stephen Hemminger @ 2019-07-10 17:49 UTC (permalink / raw)
  To: Olivier Matz; +Cc: dev

On Wed, 10 Jul 2019 11:29:07 +0200
Olivier Matz <olivier.matz@6wind.com> wrote:

>  /**
>   * Indicate that the metadata field in the mbuf is in use.
> @@ -738,6 +741,8 @@ struct rte_mbuf {
>  	 */
>  	struct rte_mbuf_ext_shared_info *shinfo;
>  
> +	uint64_t dynfield1; /**< Reserved for dynamic fields. */
> +	uint64_t dynfield2; /**< Reserved for dynamic fields. */
>  } __rte_cache_aligned;

Growing mbuf is a fundamental ABI break and this needs
higher level approval.  Why not one pointer?

It looks like you are creating something like FreeBSD m_tag.
Why not use that?

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-10 17:49 ` Stephen Hemminger
@ 2019-07-10 18:12   ` Wiles, Keith
  2019-07-11  7:53     ` Olivier Matz
  2019-07-11  7:36   ` Olivier Matz
  1 sibling, 1 reply; 64+ messages in thread
From: Wiles, Keith @ 2019-07-10 18:12 UTC (permalink / raw)
  To: Olivier Matz; +Cc: dpdk dev community, Stephen Hemminger



> On Jul 10, 2019, at 12:49 PM, Stephen Hemminger <stephen@networkplumber.org> wrote:
> 
> On Wed, 10 Jul 2019 11:29:07 +0200
> Olivier Matz <olivier.matz@6wind.com> wrote:
> 
>> /**
>>  * Indicate that the metadata field in the mbuf is in use.
>> @@ -738,6 +741,8 @@ struct rte_mbuf {
>> 	 */
>> 	struct rte_mbuf_ext_shared_info *shinfo;
>> 
>> +	uint64_t dynfield1; /**< Reserved for dynamic fields. */
>> +	uint64_t dynfield2; /**< Reserved for dynamic fields. */
>> } __rte_cache_aligned;
> 
> Growing mbuf is a fundamental ABI break and this needs
> higher level approval.  Why not one pointer?
> 
> It looks like you are creating something like FreeBSD m_tag.
> Why not use that?

Changing the mbuf structure causes a big problem for a number reasons as Stephen states.

If we leave the mbuf stucture alone and add this feature to the headroom space between the mbuf structure and the packet. When setting up the mempool/mbuf pool we define a headroom to hold the extra data when the mbuf pool is created or just use the current headroom space. Using this method we can eliminate the mbuf structure change and add the data to the packet buffer. We can do away with dynfield1 and 2 as we know where headroom space begins and ends. Just a thought.

Regards,
Keith


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-10 17:14 ` Wang, Haiyue
@ 2019-07-11  7:26   ` Olivier Matz
  2019-07-11  8:04     ` Wang, Haiyue
  2019-07-11 15:31     ` Stephen Hemminger
  0 siblings, 2 replies; 64+ messages in thread
From: Olivier Matz @ 2019-07-11  7:26 UTC (permalink / raw)
  To: Wang, Haiyue; +Cc: dev

Hi,

On Wed, Jul 10, 2019 at 05:14:33PM +0000, Wang, Haiyue wrote:
> Hi,
> 
> Sounds cool, just have some questions inline.
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier Matz
> > Sent: Wednesday, July 10, 2019 17:29
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
> > 
> > Many features require to store data inside the mbuf. As the room in mbuf
> > structure is limited, it is not possible to have a field for each
> > feature. Also, changing fields in the mbuf structure can break the API
> > or ABI.
> > 
> > This commit addresses these issues, by enabling the dynamic registration
> > of fields or flags:
> > 
> > - a dynamic field is a named area in the rte_mbuf structure, with a
> >   given size (>= 1 byte) and alignment constraint.
> > - a dynamic flag is a named bit in the rte_mbuf structure.
> > 
> > The typical use case is a PMD that registers space for an offload
> > feature, when the application requests to enable this feature.  As
> > the space in mbuf is limited, the space should only be reserved if it
> > is going to be used (i.e when the application explicitly asks for it).
> > 
> > The registration can be done at any moment, but it is not possible
> > to unregister fields or flags for now.
> > 
> > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>

(...)

> > +/**
> > + * @file
> > + * RTE Mbuf dynamic fields and flags
> > + *
> > + * Many features require to store data inside the mbuf. As the room in
> > + * mbuf structure is limited, it is not possible to have a field for
> > + * each feature. Also, changing fields in the mbuf structure can break
> > + * the API or ABI.
> > + *
> > + * This module addresses this issue, by enabling the dynamic
> > + * registration of fields or flags:
> > + *
> > + * - a dynamic field is a named area in the rte_mbuf structure, with a
> > + *   given size (>= 1 byte) and alignment constraint.
> > + * - a dynamic flag is a named bit in the rte_mbuf structure.
> > + *
> > + * The typical use case is a PMD that registers space for an offload
> > + * feature, when the application requests to enable this feature.  As
> > + * the space in mbuf is limited, the space should only be reserved if it
> > + * is going to be used (i.e when the application explicitly asks for it).
> > + *
> > + * The registration can be done at any moment, but it is not possible
> > + * to unregister fields or flags for now.
> > + *
> > + * Example of use:
> > + *
> > + * - RTE_MBUF_DYN_<feature>_(ID|SIZE|ALIGN) are defined in this file
> 
> Does it means that all PMDs define their own 'RTE_MBUF_DYN_<feature>_(ID|SIZE|ALIGN)'
> here ? In other words, each PMD can expose its private DYN_<feature> here for public
> using ?

For generic fields, I think they should be declared in this file. For
instance, if we decide to replace the current m->timestamp field by a
dynamic field, we should add like this:

#define RTE_MBUF_DYN_TIMESTAMP_ID "rte_timestamp"
#define RTE_MBUF_DYN_TIMESTAMP_SIZE sizeof(uint64_t)
#define RTE_MBUF_DYN_TIMESTAMP_ALIGN __alignof__(uint64_t)

If the feature is PMD-specific, the defines could be exposed in a
PMD header.

> How about adding another eth_dev_ops API definitions to show the PMD's supporting feature
> names, sizes, align in run time for testpmd ? And also another eth_dev_ops API for showing
> the data saved in rte_mbuf by 'dump_pkt_burst' ? Adding a new command for testpmd to set
> the dynamic feature may be good for PMD test.
> 
> > + * - If the application asks for the feature, the PMD use
> 
> How does the application ask for the feature ? By ' rte_mbuf_dynfield_register()' ?

No change in this area. If we take again the timestamp example, the
feature is asked by the application through the ethdev layer by passing
DEV_RX_OFFLOAD_TIMESTAMP to port or queue configuration.

> 
> > + *   rte_mbuf_dynfield_register() to get the dynamic offset and stores
> > + *   in a global variable.
> 
> In case, the PMD calls 'rte_mbuf_dynfield_register()' for 'dyn_feature' firstly, this
> means that PMD requests the dynamic feature itself if I understand correctly. Should
> PMD calls 'rte_mbuf_dynfield_lookup' for 'dyn_feature' to query the name exists, the
> size and align are right as expected ? If exists, but size and align are not right, may
> be for PMD change its definition, then PMD can give a warning or error message. If name
> exists, both size and align are expected, then PMD think that the application request
> the right dynamic features.

The PMD should only call rte_mbuf_dynfield_register() if the application
requests the feature (through ethdev, or through another mean if it's a
PMD-specific feature). The goal is to only reserve the area in the mbuf
for features that are actually needed.

Hope this is clearer now. I think I need to enhance the documentation in
next version ;)

Thanks for the feedback.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-10 17:49 ` Stephen Hemminger
  2019-07-10 18:12   ` Wiles, Keith
@ 2019-07-11  7:36   ` Olivier Matz
  2019-07-12 12:23     ` Jerin Jacob Kollanukkaran
  1 sibling, 1 reply; 64+ messages in thread
From: Olivier Matz @ 2019-07-11  7:36 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev

On Wed, Jul 10, 2019 at 10:49:17AM -0700, Stephen Hemminger wrote:
> On Wed, 10 Jul 2019 11:29:07 +0200
> Olivier Matz <olivier.matz@6wind.com> wrote:
> 
> >  /**
> >   * Indicate that the metadata field in the mbuf is in use.
> > @@ -738,6 +741,8 @@ struct rte_mbuf {
> >  	 */
> >  	struct rte_mbuf_ext_shared_info *shinfo;
> >  
> > +	uint64_t dynfield1; /**< Reserved for dynamic fields. */
> > +	uint64_t dynfield2; /**< Reserved for dynamic fields. */
> >  } __rte_cache_aligned;
> 
> Growing mbuf is a fundamental ABI break and this needs
> higher level approval.

The size of the mbuf is still 128, I used the last 16 bytes that
were unused.

Later, we can think about removing existing fields and replace
them by a dynfield area, which can be anywhere in the structure
(even if it is in a 1 byte hole).

>  Why not one pointer?

A pointer to what?

> It looks like you are creating something like FreeBSD m_tag.
> Why not use that?

My implementation targets performance (accessing to *(mbuf + offset)
should be nearly as fast as accessing to a static field), at the price
of less flexibility compared to something like FreeBSD m_tag.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-10 18:12   ` Wiles, Keith
@ 2019-07-11  7:53     ` Olivier Matz
  2019-07-11 14:37       ` Wiles, Keith
  0 siblings, 1 reply; 64+ messages in thread
From: Olivier Matz @ 2019-07-11  7:53 UTC (permalink / raw)
  To: Wiles, Keith; +Cc: dpdk dev community, Stephen Hemminger

Hi Keith,

On Wed, Jul 10, 2019 at 06:12:16PM +0000, Wiles, Keith wrote:
> 
> 
> > On Jul 10, 2019, at 12:49 PM, Stephen Hemminger <stephen@networkplumber.org> wrote:
> > 
> > On Wed, 10 Jul 2019 11:29:07 +0200
> > Olivier Matz <olivier.matz@6wind.com> wrote:
> > 
> >> /**
> >>  * Indicate that the metadata field in the mbuf is in use.
> >> @@ -738,6 +741,8 @@ struct rte_mbuf {
> >> 	 */
> >> 	struct rte_mbuf_ext_shared_info *shinfo;
> >> 
> >> +	uint64_t dynfield1; /**< Reserved for dynamic fields. */
> >> +	uint64_t dynfield2; /**< Reserved for dynamic fields. */
> >> } __rte_cache_aligned;
> > 
> > Growing mbuf is a fundamental ABI break and this needs
> > higher level approval.  Why not one pointer?
> > 
> > It looks like you are creating something like FreeBSD m_tag.
> > Why not use that?
> 
> Changing the mbuf structure causes a big problem for a number reasons as Stephen states.

Can you elaborate?

This is indeed an ABI break, but I think this is only due to the adding
of rte_mbuf_dynfield_copy() in rte_pktmbuf_attach(). The size of the
mbuf does not change and the fields are not initialized when creating a
new mbuf. So I think there is no ABI change for code that is not using
rte_pktmbuf_attach().

I don't think it's a problem to have one ABI change, if it avoids many
others in the future.

> If we leave the mbuf stucture alone and add this feature to the
> headroom space between the mbuf structure and the packet. When setting
> up the mempool/mbuf pool we define a headroom to hold the extra data
> when the mbuf pool is created or just use the current headroom
> space. Using this method we can eliminate the mbuf structure change
> and add the data to the packet buffer. We can do away with dynfield1
> and 2 as we know where headroom space begins and ends. Just a thought.

The size of the mbuf metadata (between the mbuf structure and the
buffer) is configured per pool, so it can be different accross
mbufs. So, the access to the dynamic field would be slower:
*(mbuf + dynfield_offset + metadata_size(mbuf))

Also, the size of the data buffer can be 0: it happens for mbuf pools
that are dedicated to mbuf clones (that reference data in another mbuf
or in an external buffer). In this case, there is no room after metadata
to store the dynamic fields.

Thanks,
Olivier

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-11  7:26   ` Olivier Matz
@ 2019-07-11  8:04     ` Wang, Haiyue
  2019-07-11  8:20       ` Olivier Matz
  2019-07-11 15:31     ` Stephen Hemminger
  1 sibling, 1 reply; 64+ messages in thread
From: Wang, Haiyue @ 2019-07-11  8:04 UTC (permalink / raw)
  To: Olivier Matz; +Cc: dev

> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Thursday, July 11, 2019 15:26
> To: Wang, Haiyue <haiyue.wang@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
> 
> Hi,
> 
> On Wed, Jul 10, 2019 at 05:14:33PM +0000, Wang, Haiyue wrote:
> > Hi,
> >
> > Sounds cool, just have some questions inline.
> >
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier Matz
> > > Sent: Wednesday, July 10, 2019 17:29
> > > To: dev@dpdk.org
> > > Subject: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
> > >
> > > Many features require to store data inside the mbuf. As the room in mbuf
> > > structure is limited, it is not possible to have a field for each
> > > feature. Also, changing fields in the mbuf structure can break the API
> > > or ABI.
> > >
> > > This commit addresses these issues, by enabling the dynamic registration
> > > of fields or flags:
> > >
> > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > >   given size (>= 1 byte) and alignment constraint.
> > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > >
> > > The typical use case is a PMD that registers space for an offload
> > > feature, when the application requests to enable this feature.  As
> > > the space in mbuf is limited, the space should only be reserved if it
> > > is going to be used (i.e when the application explicitly asks for it).
> > >
> > > The registration can be done at any moment, but it is not possible
> > > to unregister fields or flags for now.
> > >
> > > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> 
> (...)
> 
> > > +/**
> > > + * @file
> > > + * RTE Mbuf dynamic fields and flags
> > > + *
> > > + * Many features require to store data inside the mbuf. As the room in
> > > + * mbuf structure is limited, it is not possible to have a field for
> > > + * each feature. Also, changing fields in the mbuf structure can break
> > > + * the API or ABI.
> > > + *
> > > + * This module addresses this issue, by enabling the dynamic
> > > + * registration of fields or flags:
> > > + *
> > > + * - a dynamic field is a named area in the rte_mbuf structure, with a
> > > + *   given size (>= 1 byte) and alignment constraint.
> > > + * - a dynamic flag is a named bit in the rte_mbuf structure.
> > > + *
> > > + * The typical use case is a PMD that registers space for an offload
> > > + * feature, when the application requests to enable this feature.  As
> > > + * the space in mbuf is limited, the space should only be reserved if it
> > > + * is going to be used (i.e when the application explicitly asks for it).
> > > + *
> > > + * The registration can be done at any moment, but it is not possible
> > > + * to unregister fields or flags for now.
> > > + *
> > > + * Example of use:
> > > + *
> > > + * - RTE_MBUF_DYN_<feature>_(ID|SIZE|ALIGN) are defined in this file
> >
> > Does it means that all PMDs define their own 'RTE_MBUF_DYN_<feature>_(ID|SIZE|ALIGN)'
> > here ? In other words, each PMD can expose its private DYN_<feature> here for public
> > using ?
> 
> For generic fields, I think they should be declared in this file. For
> instance, if we decide to replace the current m->timestamp field by a
> dynamic field, we should add like this:
> 
> #define RTE_MBUF_DYN_TIMESTAMP_ID "rte_timestamp"
> #define RTE_MBUF_DYN_TIMESTAMP_SIZE sizeof(uint64_t)
> #define RTE_MBUF_DYN_TIMESTAMP_ALIGN __alignof__(uint64_t)
> 
> If the feature is PMD-specific, the defines could be exposed in a
> PMD header.
> 

Now, understand the comments a little : ... must not define identifers prefixed with "rte_",
which are reserved for standard features. Seems have big plan ?

> > How about adding another eth_dev_ops API definitions to show the PMD's supporting feature
> > names, sizes, align in run time for testpmd ? And also another eth_dev_ops API for showing
> > the data saved in rte_mbuf by 'dump_pkt_burst' ? Adding a new command for testpmd to set
> > the dynamic feature may be good for PMD test.
> >
> > > + * - If the application asks for the feature, the PMD use
> >
> > How does the application ask for the feature ? By ' rte_mbuf_dynfield_register()' ?
> 
> No change in this area. If we take again the timestamp example, the
> feature is asked by the application through the ethdev layer by passing
> DEV_RX_OFFLOAD_TIMESTAMP to port or queue configuration.
> 
> >
> > > + *   rte_mbuf_dynfield_register() to get the dynamic offset and stores
> > > + *   in a global variable.
> >
> > In case, the PMD calls 'rte_mbuf_dynfield_register()' for 'dyn_feature' firstly, this
> > means that PMD requests the dynamic feature itself if I understand correctly. Should
> > PMD calls 'rte_mbuf_dynfield_lookup' for 'dyn_feature' to query the name exists, the
> > size and align are right as expected ? If exists, but size and align are not right, may
> > be for PMD change its definition, then PMD can give a warning or error message. If name
> > exists, both size and align are expected, then PMD think that the application request
> > the right dynamic features.
> 
> The PMD should only call rte_mbuf_dynfield_register() if the application
> requests the feature (through ethdev, or through another mean if it's a
> PMD-specific feature). The goal is to only reserve the area in the mbuf
> for features that are actually needed.
> 
> Hope this is clearer now. I think I need to enhance the documentation in
> next version ;)
> 

Clearer now, more test code also will be better for fully understanding, thanks! :)

> Thanks for the feedback.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-11  8:04     ` Wang, Haiyue
@ 2019-07-11  8:20       ` Olivier Matz
  2019-07-11  8:34         ` Wang, Haiyue
  0 siblings, 1 reply; 64+ messages in thread
From: Olivier Matz @ 2019-07-11  8:20 UTC (permalink / raw)
  To: Wang, Haiyue; +Cc: dev

On Thu, Jul 11, 2019 at 08:04:00AM +0000, Wang, Haiyue wrote:
> > -----Original Message-----
> > From: Olivier Matz [mailto:olivier.matz@6wind.com]
> > Sent: Thursday, July 11, 2019 15:26
> > To: Wang, Haiyue <haiyue.wang@intel.com>
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
> > 
> > Hi,
> > 
> > On Wed, Jul 10, 2019 at 05:14:33PM +0000, Wang, Haiyue wrote:
> > > Hi,
> > >
> > > Sounds cool, just have some questions inline.
> > >
> > > > -----Original Message-----
> > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier Matz
> > > > Sent: Wednesday, July 10, 2019 17:29
> > > > To: dev@dpdk.org
> > > > Subject: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
> > > >
> > > > Many features require to store data inside the mbuf. As the room in mbuf
> > > > structure is limited, it is not possible to have a field for each
> > > > feature. Also, changing fields in the mbuf structure can break the API
> > > > or ABI.
> > > >
> > > > This commit addresses these issues, by enabling the dynamic registration
> > > > of fields or flags:
> > > >
> > > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > > >   given size (>= 1 byte) and alignment constraint.
> > > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > > >
> > > > The typical use case is a PMD that registers space for an offload
> > > > feature, when the application requests to enable this feature.  As
> > > > the space in mbuf is limited, the space should only be reserved if it
> > > > is going to be used (i.e when the application explicitly asks for it).
> > > >
> > > > The registration can be done at any moment, but it is not possible
> > > > to unregister fields or flags for now.
> > > >
> > > > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > 
> > (...)
> > 
> > > > +/**
> > > > + * @file
> > > > + * RTE Mbuf dynamic fields and flags
> > > > + *
> > > > + * Many features require to store data inside the mbuf. As the room in
> > > > + * mbuf structure is limited, it is not possible to have a field for
> > > > + * each feature. Also, changing fields in the mbuf structure can break
> > > > + * the API or ABI.
> > > > + *
> > > > + * This module addresses this issue, by enabling the dynamic
> > > > + * registration of fields or flags:
> > > > + *
> > > > + * - a dynamic field is a named area in the rte_mbuf structure, with a
> > > > + *   given size (>= 1 byte) and alignment constraint.
> > > > + * - a dynamic flag is a named bit in the rte_mbuf structure.
> > > > + *
> > > > + * The typical use case is a PMD that registers space for an offload
> > > > + * feature, when the application requests to enable this feature.  As
> > > > + * the space in mbuf is limited, the space should only be reserved if it
> > > > + * is going to be used (i.e when the application explicitly asks for it).
> > > > + *
> > > > + * The registration can be done at any moment, but it is not possible
> > > > + * to unregister fields or flags for now.
> > > > + *
> > > > + * Example of use:
> > > > + *
> > > > + * - RTE_MBUF_DYN_<feature>_(ID|SIZE|ALIGN) are defined in this file
> > >
> > > Does it means that all PMDs define their own 'RTE_MBUF_DYN_<feature>_(ID|SIZE|ALIGN)'
> > > here ? In other words, each PMD can expose its private DYN_<feature> here for public
> > > using ?
> > 
> > For generic fields, I think they should be declared in this file. For
> > instance, if we decide to replace the current m->timestamp field by a
> > dynamic field, we should add like this:
> > 
> > #define RTE_MBUF_DYN_TIMESTAMP_ID "rte_timestamp"
> > #define RTE_MBUF_DYN_TIMESTAMP_SIZE sizeof(uint64_t)
> > #define RTE_MBUF_DYN_TIMESTAMP_ALIGN __alignof__(uint64_t)
> > 
> > If the feature is PMD-specific, the defines could be exposed in a
> > PMD header.
> > 
> 
> Now, understand the comments a little : ... must not define identifers prefixed with "rte_",
> which are reserved for standard features. Seems have big plan ?

The dynamic field can also be used by an external application or by an
external library. For instance, a field to tag a packet, like skb->mark
in linux. In this case, id, size and alignment would be defined outside
dpdk subtree.

To avoid name conflicts, I think we should define a convention for
identifiers, so they are in different namespaces:

- "rte_*" for identifiers declared inside dpdk subtree
- any other name for identifiers declared in an external application or
  library

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-11  8:20       ` Olivier Matz
@ 2019-07-11  8:34         ` Wang, Haiyue
  0 siblings, 0 replies; 64+ messages in thread
From: Wang, Haiyue @ 2019-07-11  8:34 UTC (permalink / raw)
  To: Olivier Matz; +Cc: dev

> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Thursday, July 11, 2019 16:21
> To: Wang, Haiyue <haiyue.wang@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
> 
> On Thu, Jul 11, 2019 at 08:04:00AM +0000, Wang, Haiyue wrote:
> > > -----Original Message-----
> > > From: Olivier Matz [mailto:olivier.matz@6wind.com]
> > > Sent: Thursday, July 11, 2019 15:26
> > > To: Wang, Haiyue <haiyue.wang@intel.com>
> > > Cc: dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
> > >
> > > Hi,
> > >
> > > On Wed, Jul 10, 2019 at 05:14:33PM +0000, Wang, Haiyue wrote:
> > > > Hi,
> > > >
> > > > Sounds cool, just have some questions inline.
> > > >
> > > > > -----Original Message-----
> > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier Matz
> > > > > Sent: Wednesday, July 10, 2019 17:29
> > > > > To: dev@dpdk.org
> > > > > Subject: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
> > > > >
> > > > > Many features require to store data inside the mbuf. As the room in mbuf
> > > > > structure is limited, it is not possible to have a field for each
> > > > > feature. Also, changing fields in the mbuf structure can break the API
> > > > > or ABI.
> > > > >
> > > > > This commit addresses these issues, by enabling the dynamic registration
> > > > > of fields or flags:
> > > > >
> > > > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > > > >   given size (>= 1 byte) and alignment constraint.
> > > > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > > > >
> > > > > The typical use case is a PMD that registers space for an offload
> > > > > feature, when the application requests to enable this feature.  As
> > > > > the space in mbuf is limited, the space should only be reserved if it
> > > > > is going to be used (i.e when the application explicitly asks for it).
> > > > >
> > > > > The registration can be done at any moment, but it is not possible
> > > > > to unregister fields or flags for now.
> > > > >
> > > > > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > >
> > > (...)
> > >
> > > > > +/**
> > > > > + * @file
> > > > > + * RTE Mbuf dynamic fields and flags
> > > > > + *
> > > > > + * Many features require to store data inside the mbuf. As the room in
> > > > > + * mbuf structure is limited, it is not possible to have a field for
> > > > > + * each feature. Also, changing fields in the mbuf structure can break
> > > > > + * the API or ABI.
> > > > > + *
> > > > > + * This module addresses this issue, by enabling the dynamic
> > > > > + * registration of fields or flags:
> > > > > + *
> > > > > + * - a dynamic field is a named area in the rte_mbuf structure, with a
> > > > > + *   given size (>= 1 byte) and alignment constraint.
> > > > > + * - a dynamic flag is a named bit in the rte_mbuf structure.
> > > > > + *
> > > > > + * The typical use case is a PMD that registers space for an offload
> > > > > + * feature, when the application requests to enable this feature.  As
> > > > > + * the space in mbuf is limited, the space should only be reserved if it
> > > > > + * is going to be used (i.e when the application explicitly asks for it).
> > > > > + *
> > > > > + * The registration can be done at any moment, but it is not possible
> > > > > + * to unregister fields or flags for now.
> > > > > + *
> > > > > + * Example of use:
> > > > > + *
> > > > > + * - RTE_MBUF_DYN_<feature>_(ID|SIZE|ALIGN) are defined in this file
> > > >
> > > > Does it means that all PMDs define their own 'RTE_MBUF_DYN_<feature>_(ID|SIZE|ALIGN)'
> > > > here ? In other words, each PMD can expose its private DYN_<feature> here for public
> > > > using ?
> > >
> > > For generic fields, I think they should be declared in this file. For
> > > instance, if we decide to replace the current m->timestamp field by a
> > > dynamic field, we should add like this:
> > >
> > > #define RTE_MBUF_DYN_TIMESTAMP_ID "rte_timestamp"
> > > #define RTE_MBUF_DYN_TIMESTAMP_SIZE sizeof(uint64_t)
> > > #define RTE_MBUF_DYN_TIMESTAMP_ALIGN __alignof__(uint64_t)
> > >
> > > If the feature is PMD-specific, the defines could be exposed in a
> > > PMD header.
> > >
> >
> > Now, understand the comments a little : ... must not define identifers prefixed with "rte_",
> > which are reserved for standard features. Seems have big plan ?
> 
> The dynamic field can also be used by an external application or by an
> external library. For instance, a field to tag a packet, like skb->mark
> in linux. In this case, id, size and alignment would be defined outside
> dpdk subtree.
> 
> To avoid name conflicts, I think we should define a convention for
> identifiers, so they are in different namespaces:
> 
> - "rte_*" for identifiers declared inside dpdk subtree
> - any other name for identifiers declared in an external application or
>   library

Very clearer now, thanks, this convention can be in programming guide document. :)

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-10  9:29 [dpdk-dev] [RFC] mbuf: support dynamic fields and flags Olivier Matz
  2019-07-10 17:14 ` Wang, Haiyue
  2019-07-10 17:49 ` Stephen Hemminger
@ 2019-07-11  9:24 ` Thomas Monjalon
  2019-07-12 14:54 ` Andrew Rybchenko
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 64+ messages in thread
From: Thomas Monjalon @ 2019-07-11  9:24 UTC (permalink / raw)
  To: dev; +Cc: Olivier Matz

10/07/2019 11:29, Olivier Matz:
> Many features require to store data inside the mbuf. As the room in mbuf
> structure is limited, it is not possible to have a field for each
> feature. Also, changing fields in the mbuf structure can break the API
> or ABI.
> 
> This commit addresses these issues, by enabling the dynamic registration
> of fields or flags:
> 
> - a dynamic field is a named area in the rte_mbuf structure, with a
>   given size (>= 1 byte) and alignment constraint.
> - a dynamic flag is a named bit in the rte_mbuf structure.
> 
> The typical use case is a PMD that registers space for an offload
> feature, when the application requests to enable this feature.  As
> the space in mbuf is limited, the space should only be reserved if it
> is going to be used (i.e when the application explicitly asks for it).
> 
> The registration can be done at any moment, but it is not possible
> to unregister fields or flags for now.
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>

I fully support this solution.
It will give a lot of space for new features and will solve
the ABI stability problem.

Next step, I would like to move some existing mbuf fields to this
dynamic model. It will increase the free space in mbuf to be used
by dynamic fields. By converting some fields which are currently
union'ed, we can also fix the issue of these features being exclusive.

Acked-by: Thomas Monjalon <thomas@monjalon.net>



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-11  7:53     ` Olivier Matz
@ 2019-07-11 14:37       ` Wiles, Keith
  2019-07-12  9:06         ` Olivier Matz
  0 siblings, 1 reply; 64+ messages in thread
From: Wiles, Keith @ 2019-07-11 14:37 UTC (permalink / raw)
  To: Olivier Matz; +Cc: dpdk dev community, Stephen Hemminger



> On Jul 11, 2019, at 2:53 AM, Olivier Matz <olivier.matz@6wind.com> wrote:
> 
> Hi Keith,
> 
> On Wed, Jul 10, 2019 at 06:12:16PM +0000, Wiles, Keith wrote:
>> 
>> 
>>> On Jul 10, 2019, at 12:49 PM, Stephen Hemminger <stephen@networkplumber.org> wrote:
>>> 
>>> On Wed, 10 Jul 2019 11:29:07 +0200
>>> Olivier Matz <olivier.matz@6wind.com> wrote:
>>> 
>>>> /**
>>>> * Indicate that the metadata field in the mbuf is in use.
>>>> @@ -738,6 +741,8 @@ struct rte_mbuf {
>>>> 	 */
>>>> 	struct rte_mbuf_ext_shared_info *shinfo;
>>>> 
>>>> +	uint64_t dynfield1; /**< Reserved for dynamic fields. */
>>>> +	uint64_t dynfield2; /**< Reserved for dynamic fields. */
>>>> } __rte_cache_aligned;
>>> 
>>> Growing mbuf is a fundamental ABI break and this needs
>>> higher level approval.  Why not one pointer?
>>> 
>>> It looks like you are creating something like FreeBSD m_tag.
>>> Why not use that?
>> 
>> Changing the mbuf structure causes a big problem for a number reasons as Stephen states.
> 
> Can you elaborate?
> 
> This is indeed an ABI break, but I think this is only due to the adding
> of rte_mbuf_dynfield_copy() in rte_pktmbuf_attach(). The size of the
> mbuf does not change and the fields are not initialized when creating a
> new mbuf. So I think there is no ABI change for code that is not using
> rte_pktmbuf_attach().
> 
> I don't think it's a problem to have one ABI change, if it avoids many
> others in the future.
> 
>> If we leave the mbuf stucture alone and add this feature to the
>> headroom space between the mbuf structure and the packet. When setting
>> up the mempool/mbuf pool we define a headroom to hold the extra data
>> when the mbuf pool is created or just use the current headroom
>> space. Using this method we can eliminate the mbuf structure change
>> and add the data to the packet buffer. We can do away with dynfield1
>> and 2 as we know where headroom space begins and ends. Just a thought.
> 
> The size of the mbuf metadata (between the mbuf structure and the
> buffer) is configured per pool, so it can be different accross
> mbufs. So, the access to the dynamic field would be slower:
> *(mbuf + dynfield_offset + metadata_size(mbuf))

We can force that space to be a minimum size when the mempool is created in the case of a cloned mbuf. The cloned mbuf is a small use case, but am important one and increasing the size for those special mbufs by a cache line should not be a huge problem.

I think most allocations do not change the size from the default value of the headroom (128). The mbuf + buffer are normally rounded to 2K or a bit bigger, which gives a bit more space in those cases of a packet size of 1518-1522. Jumbo frames are the same. Using the headroom size for an application needs to be defined and setup for the max size anyway for the application needs, so normally all mbuf creates should contain the same size to account for mbuf moments within the system.

That is my $0.02.

> 
> Also, the size of the data buffer can be 0: it happens for mbuf pools
> that are dedicated to mbuf clones (that reference data in another mbuf
> or in an external buffer). In this case, there is no room after metadata
> to store the dynamic fields.
> 
> Thanks,
> Olivier

Regards,
Keith


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-11  7:26   ` Olivier Matz
  2019-07-11  8:04     ` Wang, Haiyue
@ 2019-07-11 15:31     ` Stephen Hemminger
  2019-07-12  9:18       ` Olivier Matz
  1 sibling, 1 reply; 64+ messages in thread
From: Stephen Hemminger @ 2019-07-11 15:31 UTC (permalink / raw)
  To: Olivier Matz; +Cc: Wang, Haiyue, dev

On Thu, 11 Jul 2019 09:26:19 +0200
Olivier Matz <olivier.matz@6wind.com> wrote:

> For generic fields, I think they should be declared in this file. For
> instance, if we decide to replace the current m->timestamp field by a
> dynamic field, we should add like this:
> 
> #define RTE_MBUF_DYN_TIMESTAMP_ID "rte_timestamp"
> #define RTE_MBUF_DYN_TIMESTAMP_SIZE sizeof(uint64_t)
> #define RTE_MBUF_DYN_TIMESTAMP_ALIGN __alignof__(uint64_t)


Let's use  structures (like rte_flow) rather that macros for
this?

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-11 14:37       ` Wiles, Keith
@ 2019-07-12  9:06         ` Olivier Matz
  0 siblings, 0 replies; 64+ messages in thread
From: Olivier Matz @ 2019-07-12  9:06 UTC (permalink / raw)
  To: Wiles, Keith; +Cc: dpdk dev community, Stephen Hemminger

Hi,

On Thu, Jul 11, 2019 at 02:37:23PM +0000, Wiles, Keith wrote:
> 
> 
> > On Jul 11, 2019, at 2:53 AM, Olivier Matz <olivier.matz@6wind.com> wrote:
> > 
> > Hi Keith,
> > 
> > On Wed, Jul 10, 2019 at 06:12:16PM +0000, Wiles, Keith wrote:
> >> 
> >> 
> >>> On Jul 10, 2019, at 12:49 PM, Stephen Hemminger <stephen@networkplumber.org> wrote:
> >>> 
> >>> On Wed, 10 Jul 2019 11:29:07 +0200
> >>> Olivier Matz <olivier.matz@6wind.com> wrote:
> >>> 
> >>>> /**
> >>>> * Indicate that the metadata field in the mbuf is in use.
> >>>> @@ -738,6 +741,8 @@ struct rte_mbuf {
> >>>> 	 */
> >>>> 	struct rte_mbuf_ext_shared_info *shinfo;
> >>>> 
> >>>> +	uint64_t dynfield1; /**< Reserved for dynamic fields. */
> >>>> +	uint64_t dynfield2; /**< Reserved for dynamic fields. */
> >>>> } __rte_cache_aligned;
> >>> 
> >>> Growing mbuf is a fundamental ABI break and this needs
> >>> higher level approval.  Why not one pointer?
> >>> 
> >>> It looks like you are creating something like FreeBSD m_tag.
> >>> Why not use that?
> >> 
> >> Changing the mbuf structure causes a big problem for a number reasons as Stephen states.
> > 
> > Can you elaborate?
> > 
> > This is indeed an ABI break, but I think this is only due to the adding
> > of rte_mbuf_dynfield_copy() in rte_pktmbuf_attach(). The size of the
> > mbuf does not change and the fields are not initialized when creating a
> > new mbuf. So I think there is no ABI change for code that is not using
> > rte_pktmbuf_attach().
> > 
> > I don't think it's a problem to have one ABI change, if it avoids many
> > others in the future.
> > 
> >> If we leave the mbuf stucture alone and add this feature to the
> >> headroom space between the mbuf structure and the packet. When setting
> >> up the mempool/mbuf pool we define a headroom to hold the extra data
> >> when the mbuf pool is created or just use the current headroom
> >> space. Using this method we can eliminate the mbuf structure change
> >> and add the data to the packet buffer. We can do away with dynfield1
> >> and 2 as we know where headroom space begins and ends. Just a thought.
> > 
> > The size of the mbuf metadata (between the mbuf structure and the
> > buffer) is configured per pool, so it can be different accross
> > mbufs. So, the access to the dynamic field would be slower:
> > *(mbuf + dynfield_offset + metadata_size(mbuf))
> 

> We can force that space to be a minimum size when the mempool is
> created in the case of a cloned mbuf. The cloned mbuf is a small use
> case, but am important one and increasing the size for those special
> mbufs by a cache line should not be a huge problem.
> 
> I think most allocations do not change the size from the default value
> of the headroom (128). The mbuf + buffer are normally rounded to 2K or
> a bit bigger, which gives a bit more space in those cases of a packet
> size of 1518-1522. Jumbo frames are the same. Using the headroom size
> for an application needs to be defined and setup for the max size
> anyway for the application needs, so normally all mbuf creates should
> contain the same size to account for mbuf moments within the system.

If we want more room for dynamic fields, we can do something like
this. But I don't think this is something that will happen soon: we
already have 16 bytes available, and I'm sure we can get another 16
bytes very easily by just converting fields like timestamp or sequence
numbers.

To attach larger amount of data to mbufs, the metadata feature still
exists. We can imagine to extend the dynamic fields feature to be able
to use more space after the mbuf structure (in metadata?), but it is
more complex.

I don't think that using headroom or tailroom is a good idea. That's
true that mbufs are usually a bit more than 2K, and some space is lost
when mtu is 1500. But smaller mbufs are perfectly legal too, except that
some drivers do not support it. Anyway, headroom and tailroom must be
used for what they are designed: reserving room to prepend or append
data. If we want more space for dynamic fields, let's add a specific
location for it, when it will be needed.


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-11 15:31     ` Stephen Hemminger
@ 2019-07-12  9:18       ` Olivier Matz
  0 siblings, 0 replies; 64+ messages in thread
From: Olivier Matz @ 2019-07-12  9:18 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Wang, Haiyue, dev

On Thu, Jul 11, 2019 at 08:31:19AM -0700, Stephen Hemminger wrote:
> On Thu, 11 Jul 2019 09:26:19 +0200
> Olivier Matz <olivier.matz@6wind.com> wrote:
> 
> > For generic fields, I think they should be declared in this file. For
> > instance, if we decide to replace the current m->timestamp field by a
> > dynamic field, we should add like this:
> > 
> > #define RTE_MBUF_DYN_TIMESTAMP_ID "rte_timestamp"
> > #define RTE_MBUF_DYN_TIMESTAMP_SIZE sizeof(uint64_t)
> > #define RTE_MBUF_DYN_TIMESTAMP_ALIGN __alignof__(uint64_t)
> 
> 
> Let's use  structures (like rte_flow) rather that macros for
> this?

The purpose of having defines is:
- to avoid typos when registering dynamic fields/flags
- to avoid name conflicts (because define names are derived from identifier)
- associate a known size and alignment to a given dynamic field

Using strings instead of numeric identifiers is also done on
purpose, to facilitate the definition of unique identifiers outside
the dpdk subtree (as soon as we respect contraints on namespace).

Instead of defines, are you suggesting something like this?

	struct rte_mbuf_dynfield {
		const char *id;
		size_t size;
		size_t align;
	};

	/* definition of a dynamic field */
	static const struct rte_mbuf_dynfield rte_mbuf_dynfield_timestamp {
		.id = "rte_mbuf_dynfield_timestamp",
		.size = sizeof(uint64_t),
		.size = __alignof__(uint64_t),
	};

	/* ...and same for dynamic flags... */

And change the registration API to have one argument of type (struct
rte_mbuf_dynfield *) ?

I agree it could be quite nice with structs.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-11  7:36   ` Olivier Matz
@ 2019-07-12 12:23     ` Jerin Jacob Kollanukkaran
  2019-07-16  9:39       ` Olivier Matz
  0 siblings, 1 reply; 64+ messages in thread
From: Jerin Jacob Kollanukkaran @ 2019-07-12 12:23 UTC (permalink / raw)
  To: Olivier Matz, Stephen Hemminger; +Cc: dev

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Olivier Matz
> Sent: Thursday, July 11, 2019 1:07 PM
> To: Stephen Hemminger <stephen@networkplumber.org>
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
> 
> On Wed, Jul 10, 2019 at 10:49:17AM -0700, Stephen Hemminger wrote:
> > On Wed, 10 Jul 2019 11:29:07 +0200
> > Olivier Matz <olivier.matz@6wind.com> wrote:
> >
> > >  /**
> > >   * Indicate that the metadata field in the mbuf is in use.
> > > @@ -738,6 +741,8 @@ struct rte_mbuf {
> > >  	 */
> > >  	struct rte_mbuf_ext_shared_info *shinfo;
> > >
> > > +	uint64_t dynfield1; /**< Reserved for dynamic fields. */
> > > +	uint64_t dynfield2; /**< Reserved for dynamic fields. */

Since the mbuf size is fixed, What is the downside of union scheme[1] vs upside of proposed scheme

[1] Example like:
        RTE_STD_C11
        union {
                void *userdata;   /**< Can be used for external metadata */
                uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */
        };

# The fields like mbuf: hash.usr, used in variety  of use case together
Like libraries like distributor() and Eventdev using it. If we switch
to dynamic mbuf scheme, We wil take those field using rte_mbuf_dynfield_register()
on library init?

# I see an upside of dynamic mbuf if we can add rte_mbuf_dynfield_unregister API.
But can we ever do that? Because it will be complex if we need introduce notification mechanism etc.

# In the real world use case, if with union scheme, fastpath API can simply deference 
specific element (say mbuf->fieldx). With dynamic scheme, the offset need to store
in some other data structure  and de reference in fastpath before assessing the interested field.
Right?




^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-10  9:29 [dpdk-dev] [RFC] mbuf: support dynamic fields and flags Olivier Matz
                   ` (2 preceding siblings ...)
  2019-07-11  9:24 ` Thomas Monjalon
@ 2019-07-12 14:54 ` Andrew Rybchenko
  2019-07-16  9:49   ` Olivier Matz
  2019-09-18 16:54 ` [dpdk-dev] [PATCH] " Olivier Matz
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 64+ messages in thread
From: Andrew Rybchenko @ 2019-07-12 14:54 UTC (permalink / raw)
  To: Olivier Matz, dev

On 10.07.2019 12:29, Olivier Matz wrote:
> Many features require to store data inside the mbuf. As the room in mbuf
> structure is limited, it is not possible to have a field for each
> feature. Also, changing fields in the mbuf structure can break the API
> or ABI.
>
> This commit addresses these issues, by enabling the dynamic registration
> of fields or flags:
>
> - a dynamic field is a named area in the rte_mbuf structure, with a
>    given size (>= 1 byte) and alignment constraint.
> - a dynamic flag is a named bit in the rte_mbuf structure.
>
> The typical use case is a PMD that registers space for an offload
> feature, when the application requests to enable this feature.  As
> the space in mbuf is limited, the space should only be reserved if it
> is going to be used (i.e when the application explicitly asks for it).
>
> The registration can be done at any moment, but it is not possible
> to unregister fields or flags for now.
>
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>

I like the idea.

I think it would be very useful to measure performance impact. Since it is
core structure which is heavily used on datapath, performance impact is
required to make decision to go or not to go. If acceptable, more fields
can be converted to dynamic: timestamp, user data, sequence number,
timesync data etc. Rules on which fields should be static and which
dynamic are required. Timestamp, for example, is located in the first
cache line. Do we need a way prioritize some dynamic fields to be located
(if possible) in the first cache line? Or is it better simply move some 
static
to the first cache line instead?

I think rules should be better defined and imposed, if possible, when
dynamic fields may be registered. Which entities are allowed to register
dynamic fields? Do we need to keep track which entity has registered
which dynamic fields? What to expect if a dynamic field is registered
after port start (the field is registered, but most likely not filled in)?
What to expect on port restart?


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-12 12:23     ` Jerin Jacob Kollanukkaran
@ 2019-07-16  9:39       ` Olivier Matz
  2019-07-16 14:43         ` Stephen Hemminger
  0 siblings, 1 reply; 64+ messages in thread
From: Olivier Matz @ 2019-07-16  9:39 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran; +Cc: Stephen Hemminger, dev

On Fri, Jul 12, 2019 at 12:23:19PM +0000, Jerin Jacob Kollanukkaran wrote:
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Olivier Matz
> > Sent: Thursday, July 11, 2019 1:07 PM
> > To: Stephen Hemminger <stephen@networkplumber.org>
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
> > 
> > On Wed, Jul 10, 2019 at 10:49:17AM -0700, Stephen Hemminger wrote:
> > > On Wed, 10 Jul 2019 11:29:07 +0200
> > > Olivier Matz <olivier.matz@6wind.com> wrote:
> > >
> > > >  /**
> > > >   * Indicate that the metadata field in the mbuf is in use.
> > > > @@ -738,6 +741,8 @@ struct rte_mbuf {
> > > >  	 */
> > > >  	struct rte_mbuf_ext_shared_info *shinfo;
> > > >
> > > > +	uint64_t dynfield1; /**< Reserved for dynamic fields. */
> > > > +	uint64_t dynfield2; /**< Reserved for dynamic fields. */
> 
> Since the mbuf size is fixed, What is the downside of union scheme[1] vs upside of proposed scheme
> 
> [1] Example like:
>         RTE_STD_C11
>         union {
>                 void *userdata;   /**< Can be used for external metadata */
>                 uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */
>         };

In the particular case of userdata, the union is not an issue, it
just means that there are several ways to represent the same data.
If needed, it is possible to register a union as a dynamic field.

In other case, like m->hash, having a union makes it impossible to
use several features of the union at the same time. This would be
solved by dynamic fields.

> # The fields like mbuf: hash.usr, used in variety  of use case together
> Like libraries like distributor() and Eventdev using it. If we switch
> to dynamic mbuf scheme, We wil take those field using rte_mbuf_dynfield_register()
> on library init?

If we decide that these fields must be converted to a dynamic field,
yes, each library/application will call rte_mbuf_dynfield_register().

> # I see an upside of dynamic mbuf if we can add rte_mbuf_dynfield_unregister API.
> But can we ever do that? Because it will be complex if we need introduce notification mechanism etc.

An unregister mechanism seems hard to implement, or we can leave the
hard part to the user: either ensure that no mbuf is in use anywhere, or
that removing the dynamic field won't have any impact. But I'd prefer
not introducing an unregistration function until we have a real use-case
for it.

> # In the real world use case, if with union scheme, fastpath API can simply deference 
> specific element (say mbuf->fieldx). With dynamic scheme, the offset need to store
> in some other data structure  and de reference in fastpath before assessing the interested field.
> Right?

Yes, with dynamic fields, the offset is stored in a variable. A global
variable (static to the file or module using it) does the job. This may
have a small performance impact.


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-12 14:54 ` Andrew Rybchenko
@ 2019-07-16  9:49   ` Olivier Matz
  2019-07-16 11:31     ` [dpdk-dev] ***Spam*** " Andrew Rybchenko
  0 siblings, 1 reply; 64+ messages in thread
From: Olivier Matz @ 2019-07-16  9:49 UTC (permalink / raw)
  To: Andrew Rybchenko; +Cc: dev

On Fri, Jul 12, 2019 at 05:54:57PM +0300, Andrew Rybchenko wrote:
> On 10.07.2019 12:29, Olivier Matz wrote:
> > Many features require to store data inside the mbuf. As the room in mbuf
> > structure is limited, it is not possible to have a field for each
> > feature. Also, changing fields in the mbuf structure can break the API
> > or ABI.
> > 
> > This commit addresses these issues, by enabling the dynamic registration
> > of fields or flags:
> > 
> > - a dynamic field is a named area in the rte_mbuf structure, with a
> >    given size (>= 1 byte) and alignment constraint.
> > - a dynamic flag is a named bit in the rte_mbuf structure.
> > 
> > The typical use case is a PMD that registers space for an offload
> > feature, when the application requests to enable this feature.  As
> > the space in mbuf is limited, the space should only be reserved if it
> > is going to be used (i.e when the application explicitly asks for it).
> > 
> > The registration can be done at any moment, but it is not possible
> > to unregister fields or flags for now.
> > 
> > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> 
> I like the idea.
> 
> I think it would be very useful to measure performance impact. Since it is
> core structure which is heavily used on datapath, performance impact is
> required to make decision to go or not to go. If acceptable, more fields
> can be converted to dynamic: timestamp, user data, sequence number,
> timesync data etc.

Agree. I'll try to do this in the coming days.

> Rules on which fields should be static and which
> dynamic are required. Timestamp, for example, is located in the first
> cache line. Do we need a way prioritize some dynamic fields to be located
> (if possible) in the first cache line? Or is it better simply move some
> static
> to the first cache line instead?

There is a "flags" argument, which is designed for this purpose. Today,
there is no room in the first cache line, but as soon as we remove
something from it, we can add a flag to ask to register a dynamic field
in the first cache line.

> I think rules should be better defined and imposed, if possible, when
> dynamic fields may be registered. Which entities are allowed to register
> dynamic fields?

I think there is no restriction. Library, PMD, App can register their
dynamic fields as soon as there is room for it.

> Do we need to keep track which entity has registered
> which dynamic fields?

Looks quite difficult to me. Most of the time, a dynamic field will be
registered at several places. Only the first registration is effective,
the other will just get the offset.

But at least we could add a log in the registration function.

> What to expect if a dynamic field is registered
> after port start (the field is registered, but most likely not filled in)?
> What to expect on port restart?

Registration of dynamic field can be done at any moment.

But to register a field that will be used by a PMD, we need to ask for
the feature at port configuration (usually through ethdev). Then the PMD
will register the dynamic field. If it fails, the configuration of the
port should fail.

The application that will access to the field will also register it. It
can be done before or after the PMD initialization.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] ***Spam*** Re: [RFC] mbuf: support dynamic fields and flags
  2019-07-16  9:49   ` Olivier Matz
@ 2019-07-16 11:31     ` Andrew Rybchenko
  0 siblings, 0 replies; 64+ messages in thread
From: Andrew Rybchenko @ 2019-07-16 11:31 UTC (permalink / raw)
  To: Olivier Matz; +Cc: dev

On 7/16/19 12:49 PM, Olivier Matz wrote:
> On Fri, Jul 12, 2019 at 05:54:57PM +0300, Andrew Rybchenko wrote:
>> On 10.07.2019 12:29, Olivier Matz wrote:
>>> Many features require to store data inside the mbuf. As the room in mbuf
>>> structure is limited, it is not possible to have a field for each
>>> feature. Also, changing fields in the mbuf structure can break the API
>>> or ABI.
>>>
>>> This commit addresses these issues, by enabling the dynamic registration
>>> of fields or flags:
>>>
>>> - a dynamic field is a named area in the rte_mbuf structure, with a
>>>     given size (>= 1 byte) and alignment constraint.
>>> - a dynamic flag is a named bit in the rte_mbuf structure.
>>>
>>> The typical use case is a PMD that registers space for an offload
>>> feature, when the application requests to enable this feature.  As
>>> the space in mbuf is limited, the space should only be reserved if it
>>> is going to be used (i.e when the application explicitly asks for it).
>>>
>>> The registration can be done at any moment, but it is not possible
>>> to unregister fields or flags for now.
>>>
>>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
>> I like the idea.
>>
>> I think it would be very useful to measure performance impact. Since it is
>> core structure which is heavily used on datapath, performance impact is
>> required to make decision to go or not to go. If acceptable, more fields
>> can be converted to dynamic: timestamp, user data, sequence number,
>> timesync data etc.
> Agree. I'll try to do this in the coming days.
>
>> Rules on which fields should be static and which
>> dynamic are required. Timestamp, for example, is located in the first
>> cache line. Do we need a way prioritize some dynamic fields to be located
>> (if possible) in the first cache line? Or is it better simply move some
>> static
>> to the first cache line instead?
> There is a "flags" argument, which is designed for this purpose. Today,
> there is no room in the first cache line, but as soon as we remove
> something from it, we can add a flag to ask to register a dynamic field
> in the first cache line.
>
>> I think rules should be better defined and imposed, if possible, when
>> dynamic fields may be registered. Which entities are allowed to register
>> dynamic fields?
> I think there is no restriction. Library, PMD, App can register their
> dynamic fields as soon as there is room for it.

I see that API itself has no restrictions, but the goal is to have
something working and it is very easy to break things with
dynamic fields and flags. May be obvious requirements are
sufficient (e.g. should be registered before lookup to be found
by lookup), but it is getting more complicated when drivers,
core libraries and applications come into play with their life
cycles. But may be it is really out-of-scope of the API description.

Thanks.


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
  2019-07-16  9:39       ` Olivier Matz
@ 2019-07-16 14:43         ` Stephen Hemminger
  0 siblings, 0 replies; 64+ messages in thread
From: Stephen Hemminger @ 2019-07-16 14:43 UTC (permalink / raw)
  To: Olivier Matz; +Cc: Jerin Jacob Kollanukkaran, dev

On Tue, 16 Jul 2019 11:39:50 +0200
Olivier Matz <olivier.matz@6wind.com> wrote:

> On Fri, Jul 12, 2019 at 12:23:19PM +0000, Jerin Jacob Kollanukkaran wrote:
> > > -----Original Message-----
> > > From: dev <dev-bounces@dpdk.org> On Behalf Of Olivier Matz
> > > Sent: Thursday, July 11, 2019 1:07 PM
> > > To: Stephen Hemminger <stephen@networkplumber.org>
> > > Cc: dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [RFC] mbuf: support dynamic fields and flags
> > > 
> > > On Wed, Jul 10, 2019 at 10:49:17AM -0700, Stephen Hemminger wrote:  
> > > > On Wed, 10 Jul 2019 11:29:07 +0200
> > > > Olivier Matz <olivier.matz@6wind.com> wrote:
> > > >  
> > > > >  /**
> > > > >   * Indicate that the metadata field in the mbuf is in use.
> > > > > @@ -738,6 +741,8 @@ struct rte_mbuf {
> > > > >  	 */
> > > > >  	struct rte_mbuf_ext_shared_info *shinfo;
> > > > >
> > > > > +	uint64_t dynfield1; /**< Reserved for dynamic fields. */
> > > > > +	uint64_t dynfield2; /**< Reserved for dynamic fields. */  
> > 
> > Since the mbuf size is fixed, What is the downside of union scheme[1] vs upside of proposed scheme
> > 
> > [1] Example like:
> >         RTE_STD_C11
> >         union {
> >                 void *userdata;   /**< Can be used for external metadata */
> >                 uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */
> >         };  
> 
> In the particular case of userdata, the union is not an issue, it
> just means that there are several ways to represent the same data.
> If needed, it is possible to register a union as a dynamic field.
> 
> In other case, like m->hash, having a union makes it impossible to
> use several features of the union at the same time. This would be
> solved by dynamic fields.
> 
> > # The fields like mbuf: hash.usr, used in variety  of use case together
> > Like libraries like distributor() and Eventdev using it. If we switch
> > to dynamic mbuf scheme, We wil take those field using rte_mbuf_dynfield_register()
> > on library init?  
> 
> If we decide that these fields must be converted to a dynamic field,
> yes, each library/application will call rte_mbuf_dynfield_register().
> 
> > # I see an upside of dynamic mbuf if we can add rte_mbuf_dynfield_unregister API.
> > But can we ever do that? Because it will be complex if we need introduce notification mechanism etc.  
> 
> An unregister mechanism seems hard to implement, or we can leave the
> hard part to the user: either ensure that no mbuf is in use anywhere, or
> that removing the dynamic field won't have any impact. But I'd prefer
> not introducing an unregistration function until we have a real use-case
> for it.
> 
> > # In the real world use case, if with union scheme, fastpath API can simply deference 
> > specific element (say mbuf->fieldx). With dynamic scheme, the offset need to store
> > in some other data structure  and de reference in fastpath before assessing the interested field.
> > Right?  
> 
> Yes, with dynamic fields, the offset is stored in a variable. A global
> variable (static to the file or module using it) does the job. This may
> have a small performance impact.
> 

Applications are already using userdata reusing that in a driver
would cause a worse disaster than breaking ABI.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-07-10  9:29 [dpdk-dev] [RFC] mbuf: support dynamic fields and flags Olivier Matz
                   ` (3 preceding siblings ...)
  2019-07-12 14:54 ` Andrew Rybchenko
@ 2019-09-18 16:54 ` Olivier Matz
  2019-09-21  4:54   ` Wang, Haiyue
                     ` (2 more replies)
  2019-10-17 14:42 ` [dpdk-dev] [PATCH v2] " Olivier Matz
                   ` (2 subsequent siblings)
  7 siblings, 3 replies; 64+ messages in thread
From: Olivier Matz @ 2019-09-18 16:54 UTC (permalink / raw)
  To: dev
  Cc: Thomas Monjalon, Haiyue Wang, Stephen Hemminger,
	Andrew Rybchenko, Keith Wiles, Jerin Jacob Kollanukkaran

Many features require to store data inside the mbuf. As the room in mbuf
structure is limited, it is not possible to have a field for each
feature. Also, changing fields in the mbuf structure can break the API
or ABI.

This commit addresses these issues, by enabling the dynamic registration
of fields or flags:

- a dynamic field is a named area in the rte_mbuf structure, with a
  given size (>= 1 byte) and alignment constraint.
- a dynamic flag is a named bit in the rte_mbuf structure.

The typical use case is a PMD that registers space for an offload
feature, when the application requests to enable this feature.  As
the space in mbuf is limited, the space should only be reserved if it
is going to be used (i.e when the application explicitly asks for it).

The registration can be done at any moment, but it is not possible
to unregister fields or flags for now.

Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
Acked-by: Thomas Monjalon <thomas@monjalon.net>
---

rfc -> v1

* Rebase on top of master
* Change registration API to use a structure instead of
  variables, getting rid of #defines (Stephen's comment)
* Update flag registration to use a similar API as fields.
* Change max name length from 32 to 64 (sugg. by Thomas)
* Enhance API documentation (Haiyue's and Andrew's comments)
* Add a debug log at registration
* Add some words in release note
* Did some performance tests (sugg. by Andrew):
  On my platform, reading a dynamic field takes ~3 cycles more
  than a static field, and ~2 cycles more for writing.

 app/test/test_mbuf.c                   | 114 ++++++-
 doc/guides/rel_notes/release_19_11.rst |   7 +
 lib/librte_mbuf/Makefile               |   2 +
 lib/librte_mbuf/meson.build            |   6 +-
 lib/librte_mbuf/rte_mbuf.h             |  25 +-
 lib/librte_mbuf/rte_mbuf_dyn.c         | 408 +++++++++++++++++++++++++
 lib/librte_mbuf/rte_mbuf_dyn.h         | 163 ++++++++++
 lib/librte_mbuf/rte_mbuf_version.map   |   4 +
 8 files changed, 724 insertions(+), 5 deletions(-)
 create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
 create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h

diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index 2a97afe20..96acfc4b2 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -28,6 +28,7 @@
 #include <rte_random.h>
 #include <rte_cycles.h>
 #include <rte_malloc.h>
+#include <rte_mbuf_dyn.h>
 
 #include "test.h"
 
@@ -502,7 +503,6 @@ test_attach_from_different_pool(struct rte_mempool *pktmbuf_pool,
 		rte_pktmbuf_free(clone2);
 	return -1;
 }
-#undef GOTO_FAIL
 
 /*
  * test allocation and free of mbufs
@@ -1121,6 +1121,112 @@ test_tx_offload(void)
 	return (v1 == v2) ? 0 : -EINVAL;
 }
 
+static int
+test_mbuf_dyn(struct rte_mempool *pktmbuf_pool)
+{
+	const struct rte_mbuf_dynfield dynfield = {
+		.name = "test-dynfield",
+		.size = sizeof(uint8_t),
+		.align = __alignof__(uint8_t),
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynfield dynfield2 = {
+		.name = "test-dynfield2",
+		.size = sizeof(uint16_t),
+		.align = __alignof__(uint16_t),
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynfield dynfield_fail_big = {
+		.name = "test-dynfield-fail-big",
+		.size = 256,
+		.align = 1,
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynfield dynfield_fail_align = {
+		.name = "test-dynfield-fail-align",
+		.size = 1,
+		.align = 3,
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynflag dynflag = {
+		.name = "test-dynflag",
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynflag dynflag2 = {
+		.name = "test-dynflag2",
+		.flags = 0,
+	};
+	struct rte_mbuf *m = NULL;
+	int offset, offset2;
+	int flag, flag2;
+
+	printf("Test mbuf dynamic fields and flags\n");
+
+	offset = rte_mbuf_dynfield_register(&dynfield);
+	if (offset == -1)
+		GOTO_FAIL("failed to register dynamic field, offset=%d: %s",
+			offset, strerror(errno));
+
+	offset2 = rte_mbuf_dynfield_register(&dynfield);
+	if (offset2 != offset)
+		GOTO_FAIL("failed to lookup dynamic field, offset=%d, offset2=%d: %s",
+			offset, offset2, strerror(errno));
+
+	offset2 = rte_mbuf_dynfield_register(&dynfield2);
+	if (offset2 == -1 || offset2 == offset || (offset & 1))
+		GOTO_FAIL("failed to register dynfield field 2, offset=%d, offset2=%d: %s",
+			offset, offset2, strerror(errno));
+
+	printf("dynfield: offset = %d, offset2 = %d\n", offset, offset2);
+
+	offset = rte_mbuf_dynfield_register(&dynfield_fail_big);
+	if (offset != -1)
+		GOTO_FAIL("dynamic field creation should fail (too big)");
+
+	offset = rte_mbuf_dynfield_register(&dynfield_fail_align);
+	if (offset != -1)
+		GOTO_FAIL("dynamic field creation should fail (bad alignment)");
+
+	flag = rte_mbuf_dynflag_register(&dynflag);
+	if (flag == -1)
+		GOTO_FAIL("failed to register dynamic field, flag=%d: %s",
+			flag, strerror(errno));
+
+	flag2 = rte_mbuf_dynflag_register(&dynflag);
+	if (flag2 != flag)
+		GOTO_FAIL("failed to lookup dynamic field, flag=%d, flag2=%d: %s",
+			flag, flag2, strerror(errno));
+
+	flag2 = rte_mbuf_dynflag_register(&dynflag2);
+	if (flag2 == -1 || flag2 == flag)
+		GOTO_FAIL("failed to register dynflag field 2, flag=%d, flag2=%d: %s",
+			flag, flag2, strerror(errno));
+
+	printf("dynflag: flag = %d, flag2 = %d\n", flag, flag2);
+
+	/* set, get dynamic field */
+	m = rte_pktmbuf_alloc(pktmbuf_pool);
+	if (m == NULL)
+		GOTO_FAIL("Cannot allocate mbuf");
+
+	*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) = 1;
+	if (*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) != 1)
+		GOTO_FAIL("failed to read dynamic field");
+	*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) = 1000;
+	if (*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) != 1000)
+		GOTO_FAIL("failed to read dynamic field");
+
+	/* set a dynamic flag */
+	m->ol_flags |= (1ULL << flag);
+
+	rte_pktmbuf_free(m);
+	return 0;
+fail:
+	rte_pktmbuf_free(m);
+	return -1;
+}
+#undef GOTO_FAIL
+
 static int
 test_mbuf(void)
 {
@@ -1140,6 +1246,12 @@ test_mbuf(void)
 		goto err;
 	}
 
+	/* test registration of dynamic fields and flags */
+	if (test_mbuf_dyn(pktmbuf_pool) < 0) {
+		printf("mbuf dynflag test failed\n");
+		goto err;
+	}
+
 	/* create a specific pktmbuf pool with a priv_size != 0 and no data
 	 * room size */
 	pktmbuf_pool2 = rte_pktmbuf_pool_create("test_pktmbuf_pool2",
diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst
index 27cfbd9e3..0fcb76f76 100644
--- a/doc/guides/rel_notes/release_19_11.rst
+++ b/doc/guides/rel_notes/release_19_11.rst
@@ -56,6 +56,13 @@ New Features
      Also, make sure to start the actual text at the margin.
      =========================================================
 
+* **Add support of support dynamic fields and flags in mbuf.**
+
+  This new feature adds the ability to dynamically register some room
+  for a field or a flag in the mbuf structure. This is typically used
+  for specific offload features, where adding a static field or flag
+  in the mbuf is not justified.
+
 
 Removed Items
 -------------
diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile
index c8f6d2689..5a9bcee73 100644
--- a/lib/librte_mbuf/Makefile
+++ b/lib/librte_mbuf/Makefile
@@ -17,8 +17,10 @@ LIBABIVER := 5
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c rte_mbuf_pool_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_MBUF) += rte_mbuf_dyn.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h rte_mbuf_ptype.h rte_mbuf_pool_ops.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include += rte_mbuf_dyn.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build
index 6cc11ebb4..9137e8f26 100644
--- a/lib/librte_mbuf/meson.build
+++ b/lib/librte_mbuf/meson.build
@@ -2,8 +2,10 @@
 # Copyright(c) 2017 Intel Corporation
 
 version = 5
-sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c')
-headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
+sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c',
+	'rte_mbuf_dyn.c')
+headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h',
+	'rte_mbuf_dyn.h')
 deps += ['mempool']
 
 allow_experimental_apis = true
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 98225ec80..ef588cd54 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -198,9 +198,12 @@ extern "C" {
 #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
 #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
 
-/* add new RX flags here */
+/* add new RX flags here, don't forget to update PKT_FIRST_FREE */
 
-/* add new TX flags here */
+#define PKT_FIRST_FREE (1ULL << 23)
+#define PKT_LAST_FREE (1ULL << 39)
+
+/* add new TX flags here, don't forget to update PKT_LAST_FREE  */
 
 /**
  * Indicate that the metadata field in the mbuf is in use.
@@ -738,6 +741,8 @@ struct rte_mbuf {
 	 */
 	struct rte_mbuf_ext_shared_info *shinfo;
 
+	uint64_t dynfield1; /**< Reserved for dynamic fields. */
+	uint64_t dynfield2; /**< Reserved for dynamic fields. */
 } __rte_cache_aligned;
 
 /**
@@ -1684,6 +1689,21 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
  */
 #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
 
+/**
+ * Copy dynamic fields from m_src to m_dst.
+ *
+ * @param m_dst
+ *   The destination mbuf.
+ * @param m_src
+ *   The source mbuf.
+ */
+static inline void
+rte_mbuf_dynfield_copy(struct rte_mbuf *m_dst, const struct rte_mbuf *m_src)
+{
+	m_dst->dynfield1 = m_src->dynfield1;
+	m_dst->dynfield2 = m_src->dynfield2;
+}
+
 /**
  * Attach packet mbuf to another packet mbuf.
  *
@@ -1732,6 +1752,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 	mi->vlan_tci_outer = m->vlan_tci_outer;
 	mi->tx_offload = m->tx_offload;
 	mi->hash = m->hash;
+	rte_mbuf_dynfield_copy(mi, m);
 
 	mi->next = NULL;
 	mi->pkt_len = mi->data_len;
diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c b/lib/librte_mbuf/rte_mbuf_dyn.c
new file mode 100644
index 000000000..13b8742d0
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf_dyn.c
@@ -0,0 +1,408 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 6WIND S.A.
+ */
+
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_tailq.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+#include <rte_mbuf.h>
+#include <rte_mbuf_dyn.h>
+
+#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
+
+struct mbuf_dynfield_elt {
+	TAILQ_ENTRY(mbuf_dynfield_elt) next;
+	struct rte_mbuf_dynfield params;
+	int offset;
+};
+TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
+
+static struct rte_tailq_elem mbuf_dynfield_tailq = {
+	.name = "RTE_MBUF_DYNFIELD",
+};
+EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
+
+struct mbuf_dynflag_elt {
+	TAILQ_ENTRY(mbuf_dynflag_elt) next;
+	struct rte_mbuf_dynflag params;
+	int bitnum;
+};
+TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
+
+static struct rte_tailq_elem mbuf_dynflag_tailq = {
+	.name = "RTE_MBUF_DYNFLAG",
+};
+EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
+
+struct mbuf_dyn_shm {
+	/** For each mbuf byte, free_space[i] == 1 if space is free. */
+	uint8_t free_space[sizeof(struct rte_mbuf)];
+	/** Bitfield of available flags. */
+	uint64_t free_flags;
+};
+static struct mbuf_dyn_shm *shm;
+
+/* allocate and initialize the shared memory */
+static int
+init_shared_mem(void)
+{
+	const struct rte_memzone *mz;
+	uint64_t mask;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
+						sizeof(struct mbuf_dyn_shm),
+						SOCKET_ID_ANY, 0,
+						RTE_CACHE_LINE_SIZE);
+	} else {
+		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
+	}
+	if (mz == NULL)
+		return -1;
+
+	shm = mz->addr;
+
+#define mark_free(field)						\
+	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
+		0xff, sizeof(((struct rte_mbuf *)0)->field))
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		/* init free_space, keep it sync'd with
+		 * rte_mbuf_dynfield_copy().
+		 */
+		memset(shm, 0, sizeof(*shm));
+		mark_free(dynfield1);
+		mark_free(dynfield2);
+
+		/* init free_flags */
+		for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask <<= 1)
+			shm->free_flags |= mask;
+	}
+#undef mark_free
+
+	return 0;
+}
+
+/* check if this offset can be used */
+static int
+check_offset(size_t offset, size_t size, size_t align, unsigned int flags)
+{
+	size_t i;
+
+	(void)flags;
+
+	if ((offset & (align - 1)) != 0)
+		return -1;
+	if (offset + size > sizeof(struct rte_mbuf))
+		return -1;
+
+	for (i = 0; i < size; i++) {
+		if (!shm->free_space[i + offset])
+			return -1;
+	}
+
+	return 0;
+}
+
+/* assume tailq is locked */
+static struct mbuf_dynfield_elt *
+__mbuf_dynfield_lookup(const char *name)
+{
+	struct mbuf_dynfield_list *mbuf_dynfield_list;
+	struct mbuf_dynfield_elt *mbuf_dynfield;
+	struct rte_tailq_entry *te;
+
+	mbuf_dynfield_list = RTE_TAILQ_CAST(
+		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
+
+	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
+		mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
+		if (strcmp(name, mbuf_dynfield->params.name) == 0)
+			break;
+	}
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return mbuf_dynfield;
+}
+
+int
+rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield *params)
+{
+	struct mbuf_dynfield_elt *mbuf_dynfield;
+
+	if (shm == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	rte_mcfg_tailq_read_lock();
+	mbuf_dynfield = __mbuf_dynfield_lookup(name);
+	rte_mcfg_tailq_read_unlock();
+
+	if (mbuf_dynfield == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	if (params != NULL)
+		memcpy(params, &mbuf_dynfield->params, sizeof(*params));
+
+	return mbuf_dynfield->offset;
+}
+
+static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
+		const struct rte_mbuf_dynfield *params2)
+{
+	if (strcmp(params1->name, params2->name))
+		return -1;
+	if (params1->size != params2->size)
+		return -1;
+	if (params1->align != params2->align)
+		return -1;
+	if (params1->flags != params2->flags)
+		return -1;
+	return 0;
+}
+
+int
+rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params)
+{
+	struct mbuf_dynfield_list *mbuf_dynfield_list;
+	struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
+	struct rte_tailq_entry *te = NULL;
+	int offset, ret;
+	size_t i;
+
+	if (shm == NULL && init_shared_mem() < 0)
+		goto fail;
+	if (params->size >= sizeof(struct rte_mbuf)) {
+		rte_errno = EINVAL;
+		goto fail;
+	}
+	if (!rte_is_power_of_2(params->align)) {
+		rte_errno = EINVAL;
+		goto fail;
+	}
+	if (params->flags != 0) {
+		rte_errno = EINVAL;
+		goto fail;
+	}
+
+	rte_mcfg_tailq_write_lock();
+
+	mbuf_dynfield = __mbuf_dynfield_lookup(params->name);
+	if (mbuf_dynfield != NULL) {
+		if (mbuf_dynfield_cmp(params, &mbuf_dynfield->params) < 0) {
+			rte_errno = EEXIST;
+			goto fail_unlock;
+		}
+		offset = mbuf_dynfield->offset;
+		goto out_unlock;
+	}
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		rte_errno = EPERM;
+		goto fail_unlock;
+	}
+
+	for (offset = 0;
+	     offset < (int)sizeof(struct rte_mbuf);
+	     offset++) {
+		if (check_offset(offset, params->size, params->align,
+					params->flags) == 0)
+			break;
+	}
+
+	if (offset == sizeof(struct rte_mbuf)) {
+		rte_errno = ENOENT;
+		goto fail_unlock;
+	}
+
+	mbuf_dynfield_list = RTE_TAILQ_CAST(
+		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
+
+	te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL)
+		goto fail_unlock;
+
+	mbuf_dynfield = rte_zmalloc("mbuf_dynfield", sizeof(*mbuf_dynfield), 0);
+	if (mbuf_dynfield == NULL)
+		goto fail_unlock;
+
+	ret = strlcpy(mbuf_dynfield->params.name, params->name,
+		sizeof(mbuf_dynfield->params.name));
+	if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->params.name)) {
+		rte_errno = ENAMETOOLONG;
+		goto fail_unlock;
+	}
+	memcpy(&mbuf_dynfield->params, params, sizeof(mbuf_dynfield->params));
+	mbuf_dynfield->offset = offset;
+	te->data = mbuf_dynfield;
+
+	TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
+
+	for (i = offset; i < offset + params->size; i++)
+		shm->free_space[i] = 0;
+
+	RTE_LOG(DEBUG, MBUF, "Registered dynamic field %s (sz=%zu, al=%zu, fl=0x%x) -> %d\n",
+		params->name, params->size, params->align, params->flags,
+		offset);
+
+out_unlock:
+	rte_mcfg_tailq_write_unlock();
+
+	return offset;
+
+fail_unlock:
+	rte_mcfg_tailq_write_unlock();
+fail:
+	rte_free(mbuf_dynfield);
+	rte_free(te);
+	return -1;
+}
+
+/* assume tailq is locked */
+static struct mbuf_dynflag_elt *
+__mbuf_dynflag_lookup(const char *name)
+{
+	struct mbuf_dynflag_list *mbuf_dynflag_list;
+	struct mbuf_dynflag_elt *mbuf_dynflag;
+	struct rte_tailq_entry *te;
+
+	mbuf_dynflag_list = RTE_TAILQ_CAST(
+		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
+
+	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
+		mbuf_dynflag = (struct mbuf_dynflag_elt *)te->data;
+		if (strncmp(name, mbuf_dynflag->params.name,
+				RTE_MBUF_DYN_NAMESIZE) == 0)
+			break;
+	}
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return mbuf_dynflag;
+}
+
+int
+rte_mbuf_dynflag_lookup(const char *name,
+			struct rte_mbuf_dynflag *params)
+{
+	struct mbuf_dynflag_elt *mbuf_dynflag;
+
+	if (shm == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	rte_mcfg_tailq_read_lock();
+	mbuf_dynflag = __mbuf_dynflag_lookup(name);
+	rte_mcfg_tailq_read_unlock();
+
+	if (mbuf_dynflag == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	if (params != NULL)
+		memcpy(params, &mbuf_dynflag->params, sizeof(*params));
+
+	return mbuf_dynflag->bitnum;
+}
+
+static int mbuf_dynflag_cmp(const struct rte_mbuf_dynflag *params1,
+		const struct rte_mbuf_dynflag *params2)
+{
+	if (strcmp(params1->name, params2->name))
+		return -1;
+	if (params1->flags != params2->flags)
+		return -1;
+	return 0;
+}
+
+int
+rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params)
+{
+	struct mbuf_dynflag_list *mbuf_dynflag_list;
+	struct mbuf_dynflag_elt *mbuf_dynflag = NULL;
+	struct rte_tailq_entry *te = NULL;
+	int bitnum, ret;
+
+	if (shm == NULL && init_shared_mem() < 0)
+		goto fail;
+
+	rte_mcfg_tailq_write_lock();
+
+	mbuf_dynflag = __mbuf_dynflag_lookup(params->name);
+	if (mbuf_dynflag != NULL) {
+		if (mbuf_dynflag_cmp(params, &mbuf_dynflag->params) < 0) {
+			rte_errno = EEXIST;
+			goto fail_unlock;
+		}
+		bitnum = mbuf_dynflag->bitnum;
+		goto out_unlock;
+	}
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		rte_errno = EPERM;
+		goto fail_unlock;
+	}
+
+	if (shm->free_flags == 0) {
+		rte_errno = ENOENT;
+		goto fail_unlock;
+	}
+	bitnum = rte_bsf64(shm->free_flags);
+
+	mbuf_dynflag_list = RTE_TAILQ_CAST(
+		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
+
+	te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL)
+		goto fail_unlock;
+
+	mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag), 0);
+	if (mbuf_dynflag == NULL)
+		goto fail_unlock;
+
+	ret = strlcpy(mbuf_dynflag->params.name, params->name,
+		sizeof(mbuf_dynflag->params.name));
+	if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->params.name)) {
+		rte_errno = ENAMETOOLONG;
+		goto fail_unlock;
+	}
+	mbuf_dynflag->bitnum = bitnum;
+	te->data = mbuf_dynflag;
+
+	TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
+
+	shm->free_flags &= ~(1ULL << bitnum);
+
+	RTE_LOG(DEBUG, MBUF, "Registered dynamic flag %s (fl=0x%x) -> %u\n",
+		params->name, params->flags, bitnum);
+
+out_unlock:
+	rte_mcfg_tailq_write_unlock();
+
+	return bitnum;
+
+fail_unlock:
+	rte_mcfg_tailq_write_unlock();
+fail:
+	rte_free(mbuf_dynflag);
+	rte_free(te);
+	return -1;
+}
diff --git a/lib/librte_mbuf/rte_mbuf_dyn.h b/lib/librte_mbuf/rte_mbuf_dyn.h
new file mode 100644
index 000000000..6e2c81654
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf_dyn.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 6WIND S.A.
+ */
+
+#ifndef _RTE_MBUF_DYN_H_
+#define _RTE_MBUF_DYN_H_
+
+/**
+ * @file
+ * RTE Mbuf dynamic fields and flags
+ *
+ * Many features require to store data inside the mbuf. As the room in
+ * mbuf structure is limited, it is not possible to have a field for
+ * each feature. Also, changing fields in the mbuf structure can break
+ * the API or ABI.
+ *
+ * This module addresses this issue, by enabling the dynamic
+ * registration of fields or flags:
+ *
+ * - a dynamic field is a named area in the rte_mbuf structure, with a
+ *   given size (>= 1 byte) and alignment constraint.
+ * - a dynamic flag is a named bit in the rte_mbuf structure, stored
+ *   in mbuf->ol_flags.
+ *
+ * The typical use case is when a specific offload feature requires to
+ * register a dedicated offload field in the mbuf structure, and adding
+ * a static field or flag is not justified.
+ *
+ * Example of use:
+ *
+ * - A rte_mbuf_dynfield structure is defined, containing the parameters
+ *   of the dynamic field to be registered:
+ *   const struct rte_mbuf_dynfield rte_dynfield_my_feature = { ... };
+ * - The application initializes the PMD, and asks for this feature
+ *   at port initialization by passing DEV_RX_OFFLOAD_MY_FEATURE in
+ *   rxconf. This will make the PMD to register the field by calling
+ *   rte_mbuf_dynfield_register(&rte_dynfield_my_feature). The PMD
+ *   stores the returned offset.
+ * - The application that uses the offload feature also registers
+ *   the field to retrieve the same offset.
+ * - When the PMD receives a packet, it can set the field:
+ *   *RTE_MBUF_DYNFIELD(m, offset, <type *>) = value;
+ * - In the main loop, the application can retrieve the value with
+ *   the same macro.
+ *
+ * To avoid wasting space, the dynamic fields or flags must only be
+ * reserved on demand, when an application asks for the related feature.
+ *
+ * The registration can be done at any moment, but it is not possible
+ * to unregister fields or flags for now.
+ *
+ * A dynamic field can be reserved and used by an application only.
+ * It can for instance be a packet mark.
+ */
+
+#include <sys/types.h>
+/**
+ * Maximum length of the dynamic field or flag string.
+ */
+#define RTE_MBUF_DYN_NAMESIZE 64
+
+/**
+ * Structure describing the parameters of a mbuf dynamic field.
+ */
+struct rte_mbuf_dynfield {
+	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the field. */
+	size_t size;        /**< The number of bytes to reserve. */
+	size_t align;       /**< The alignment constraint (power of 2). */
+	unsigned int flags; /**< Reserved for future use, must be 0. */
+};
+
+/**
+ * Structure describing the parameters of a mbuf dynamic flag.
+ */
+struct rte_mbuf_dynflag {
+	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the dynamic flag. */
+	unsigned int flags; /**< Reserved for future use, must be 0. */
+};
+
+/**
+ * Register space for a dynamic field in the mbuf structure.
+ *
+ * If the field is already registered (same name and parameters), its
+ * offset is returned.
+ *
+ * @param params
+ *   A structure containing the requested parameters (name, size,
+ *   alignment constraint and flags).
+ * @return
+ *   The offset in the mbuf structure, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - EINVAL: invalid parameters (size, align, or flags).
+ *   - EEXIST: this name is already register with different parameters.
+ *   - EPERM: called from a secondary process.
+ *   - ENOENT: not enough room in mbuf.
+ *   - ENOMEM: allocation failure.
+ *   - ENAMETOOLONG: name does not ends with \0.
+ */
+__rte_experimental
+int rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params);
+
+/**
+ * Lookup for a registered dynamic mbuf field.
+ *
+ * @param name
+ *   A string identifying the dynamic field.
+ * @param params
+ *   If not NULL, and if the lookup is successful, the structure is
+ *   filled with the parameters of the dynamic field.
+ * @return
+ *   The offset of this field in the mbuf structure, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - ENOENT: no dynamic field matches this name.
+ */
+__rte_experimental
+int rte_mbuf_dynfield_lookup(const char *name,
+			struct rte_mbuf_dynfield *params);
+
+/**
+ * Register a dynamic flag in the mbuf structure.
+ *
+ * If the flag is already registered (same name and parameters), its
+ * offset is returned.
+ *
+ * @param params
+ *   A structure containing the requested parameters of the dynamic
+ *   flag (name and options).
+ * @return
+ *   The number of the reserved bit, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - EINVAL: invalid parameters (size, align, or flags).
+ *   - EEXIST: this name is already register with different parameters.
+ *   - EPERM: called from a secondary process.
+ *   - ENOENT: no more flag available.
+ *   - ENOMEM: allocation failure.
+ *   - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE - 1.
+ */
+__rte_experimental
+int rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params);
+
+/**
+ * Lookup for a registered dynamic mbuf flag.
+ *
+ * @param name
+ *   A string identifying the dynamic flag.
+ * @param params
+ *   If not NULL, and if the lookup is successful, the structure is
+ *   filled with the parameters of the dynamic flag.
+ * @return
+ *   The offset of this flag in the mbuf structure, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - ENOENT: no dynamic flag matches this name.
+ */
+__rte_experimental
+int rte_mbuf_dynflag_lookup(const char *name,
+			struct rte_mbuf_dynflag *params);
+
+/**
+ * Helper macro to access to a dynamic field.
+ */
+#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) + (offset)))
+
+#endif
diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
index 2662a37bf..a98310570 100644
--- a/lib/librte_mbuf/rte_mbuf_version.map
+++ b/lib/librte_mbuf/rte_mbuf_version.map
@@ -50,4 +50,8 @@ EXPERIMENTAL {
 	global:
 
 	rte_mbuf_check;
+	rte_mbuf_dynfield_lookup;
+	rte_mbuf_dynfield_register;
+	rte_mbuf_dynflag_lookup;
+	rte_mbuf_dynflag_register;
 } DPDK_18.08;
-- 
2.20.1


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-09-18 16:54 ` [dpdk-dev] [PATCH] " Olivier Matz
@ 2019-09-21  4:54   ` Wang, Haiyue
  2019-09-23  8:31     ` Olivier Matz
  2019-09-21  8:28   ` Wiles, Keith
  2019-10-01 10:49   ` Ananyev, Konstantin
  2 siblings, 1 reply; 64+ messages in thread
From: Wang, Haiyue @ 2019-09-21  4:54 UTC (permalink / raw)
  To: Olivier Matz, dev
  Cc: Thomas Monjalon, Stephen Hemminger, Andrew Rybchenko, Wiles,
	Keith, Jerin Jacob Kollanukkaran

> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Thursday, September 19, 2019 00:55
> To: dev@dpdk.org
> Cc: Thomas Monjalon <thomas@monjalon.net>; Wang, Haiyue <haiyue.wang@intel.com>; Stephen Hemminger
> <stephen@networkplumber.org>; Andrew Rybchenko <arybchenko@solarflare.com>; Wiles, Keith
> <keith.wiles@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>
> Subject: [PATCH] mbuf: support dynamic fields and flags
> 
> Many features require to store data inside the mbuf. As the room in mbuf
> structure is limited, it is not possible to have a field for each
> feature. Also, changing fields in the mbuf structure can break the API
> or ABI.
> 
> This commit addresses these issues, by enabling the dynamic registration
> of fields or flags:
> 
> - a dynamic field is a named area in the rte_mbuf structure, with a
>   given size (>= 1 byte) and alignment constraint.
> - a dynamic flag is a named bit in the rte_mbuf structure.
> 
> The typical use case is a PMD that registers space for an offload
> feature, when the application requests to enable this feature.  As
> the space in mbuf is limited, the space should only be reserved if it
> is going to be used (i.e when the application explicitly asks for it).
> 
> The registration can be done at any moment, but it is not possible
> to unregister fields or flags for now.
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> Acked-by: Thomas Monjalon <thomas@monjalon.net>
> ---
> 
> rfc -> v1
> 
> * Rebase on top of master
> * Change registration API to use a structure instead of
>   variables, getting rid of #defines (Stephen's comment)
> * Update flag registration to use a similar API as fields.
> * Change max name length from 32 to 64 (sugg. by Thomas)
> * Enhance API documentation (Haiyue's and Andrew's comments)
> * Add a debug log at registration
> * Add some words in release note
> * Did some performance tests (sugg. by Andrew):
>   On my platform, reading a dynamic field takes ~3 cycles more
>   than a static field, and ~2 cycles more for writing.
> 
>  app/test/test_mbuf.c                   | 114 ++++++-
>  doc/guides/rel_notes/release_19_11.rst |   7 +
>  lib/librte_mbuf/Makefile               |   2 +
>  lib/librte_mbuf/meson.build            |   6 +-
>  lib/librte_mbuf/rte_mbuf.h             |  25 +-
>  lib/librte_mbuf/rte_mbuf_dyn.c         | 408 +++++++++++++++++++++++++
>  lib/librte_mbuf/rte_mbuf_dyn.h         | 163 ++++++++++
>  lib/librte_mbuf/rte_mbuf_version.map   |   4 +
>  8 files changed, 724 insertions(+), 5 deletions(-)
>  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
>  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> 

[snip]

> +/**
> + * Helper macro to access to a dynamic field.
> + */
> +#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) + (offset)))

How about to change it as: ?
#define RTE_MBUF_DYNFIELD(m, offset, type) ((type *)((uintptr_t)(m) + (offset)))
                                                  ^
Then,
	*RTE_MBUF_DYNFIELD(mb, xxx, uint32_t) = yyy;

Since we use 'type' like: sizeof(type), __alignof__(type), this makes 'type' be
more consistent, not have to force cast 'type *' when using it.

	const struct rte_mbuf_dynfield dynfield2 = {
		.name = "test-dynfield2",
		.size = sizeof(uint16_t),
		.align = __alignof__(uint16_t),
		.flags = 0,
	};

And also, when I'm trying to use the dynamic flag, found a macro will be better
for making code align with dynamic field. Just a small suggestion. ;-)
	mb->ol_flags |= RTE_MBUF_DYNFLAG(ol_offset);

/**
 * Helper macro to access to a dynamic flag.
 */
#define RTE_MBUF_DYNFLAG(offset) (1ULL << (offset))

> +
> +#endif
> diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
> index 2662a37bf..a98310570 100644
> --- a/lib/librte_mbuf/rte_mbuf_version.map
> +++ b/lib/librte_mbuf/rte_mbuf_version.map
> @@ -50,4 +50,8 @@ EXPERIMENTAL {
>  	global:
> 
>  	rte_mbuf_check;
> +	rte_mbuf_dynfield_lookup;
> +	rte_mbuf_dynfield_register;
> +	rte_mbuf_dynflag_lookup;
> +	rte_mbuf_dynflag_register;
>  } DPDK_18.08;
> --
> 2.20.1


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-09-18 16:54 ` [dpdk-dev] [PATCH] " Olivier Matz
  2019-09-21  4:54   ` Wang, Haiyue
@ 2019-09-21  8:28   ` Wiles, Keith
  2019-09-23  8:56     ` Morten Brørup
  2019-09-23  9:13     ` Olivier Matz
  2019-10-01 10:49   ` Ananyev, Konstantin
  2 siblings, 2 replies; 64+ messages in thread
From: Wiles, Keith @ 2019-09-21  8:28 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Thomas Monjalon, Wang, Haiyue, Stephen Hemminger,
	Andrew Rybchenko, Jerin Jacob Kollanukkaran



> On Sep 18, 2019, at 6:54 PM, Olivier Matz <olivier.matz@6wind.com> wrote:
> 
> Many features require to store data inside the mbuf. As the room in mbuf
> structure is limited, it is not possible to have a field for each
> feature. Also, changing fields in the mbuf structure can break the API
> or ABI.
> 
> This commit addresses these issues, by enabling the dynamic registration
> of fields or flags:
> 
> - a dynamic field is a named area in the rte_mbuf structure, with a
>  given size (>= 1 byte) and alignment constraint.
> - a dynamic flag is a named bit in the rte_mbuf structure.
> 
> The typical use case is a PMD that registers space for an offload
> feature, when the application requests to enable this feature.  As
> the space in mbuf is limited, the space should only be reserved if it
> is going to be used (i.e when the application explicitly asks for it).
> 
> The registration can be done at any moment, but it is not possible
> to unregister fields or flags for now.
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> Acked-by: Thomas Monjalon <thomas@monjalon.net>
> —
> 

The idea of registration for space in the mbuf I am not a big fan. I did like Konstantin’s suggestion of having the compiler help with optimizing the code, but with a slight difference. Maybe I misunderstand, but now with this design you have to pass the offsets to different parts of the application or place in global memory or have each section request the offsets. It seems great if the application is one big application or an appliance model application having control of the whole design not so good for service chains like designs where different parts of the whole application is design by different teams.

Konstantin’s suggest if I understand it was to use structures to allow the compiler to optimize the access to the mbuf and I like that idea, but with one change we add a field in the mbuf to define the mbuf structure type.

Say 0 is the standard rte_mbuf type then type 1 could be the IPSec offset type mbuf, type 2 could be something else, … The type 0 looks just like the mbuf we have today with maybe the optional fields set to reserved or some type of filler variables to reserve the holes in the structure. Then type 1 is the IPSec mbuf and in the reserved sections of the mbuf contain the IPSec related data with the standard mbuf fields still matching the type 0 version.

This allows the mbuf to be used by the developer and the compiler now knows exactly where the fields are located in the structure and does not have to deal with any of the macros and offsets and registration suggested here. Just cast the mbuf pointer into the new type mbuf structure. We just have to make sure the code that needs to use a given mbuf type has access to the structure definitions.

If the mbufs it going to be translated from one type mbuf to another mbuf type, we just have to define that type and then cast the mbuf pointer to that structure. When an mbuf is received from IPSec PMD then the application needs to forward that mbuf to the next stage it can reset the type to 0 or to another type filling in the reserved fields to be used by the next stage in the pipeline.

The mbuf now contains the type and every point in the application can look at the type to determine how that mbuf is defined. I am sure there are some holes here, but I think it is a better solution then using all of these macros, offset values and registration APIs.


Regards,
Keith


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-09-21  4:54   ` Wang, Haiyue
@ 2019-09-23  8:31     ` Olivier Matz
  2019-09-23 11:01       ` Wang, Haiyue
  0 siblings, 1 reply; 64+ messages in thread
From: Olivier Matz @ 2019-09-23  8:31 UTC (permalink / raw)
  To: Wang, Haiyue
  Cc: dev, Thomas Monjalon, Stephen Hemminger, Andrew Rybchenko, Wiles,
	Keith, Jerin Jacob Kollanukkaran

Hi,

On Sat, Sep 21, 2019 at 04:54:39AM +0000, Wang, Haiyue wrote:
> > -----Original Message-----
> > From: Olivier Matz [mailto:olivier.matz@6wind.com]
> > Sent: Thursday, September 19, 2019 00:55
> > To: dev@dpdk.org
> > Cc: Thomas Monjalon <thomas@monjalon.net>; Wang, Haiyue <haiyue.wang@intel.com>; Stephen Hemminger
> > <stephen@networkplumber.org>; Andrew Rybchenko <arybchenko@solarflare.com>; Wiles, Keith
> > <keith.wiles@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>
> > Subject: [PATCH] mbuf: support dynamic fields and flags
> > 
> > Many features require to store data inside the mbuf. As the room in mbuf
> > structure is limited, it is not possible to have a field for each
> > feature. Also, changing fields in the mbuf structure can break the API
> > or ABI.
> > 
> > This commit addresses these issues, by enabling the dynamic registration
> > of fields or flags:
> > 
> > - a dynamic field is a named area in the rte_mbuf structure, with a
> >   given size (>= 1 byte) and alignment constraint.
> > - a dynamic flag is a named bit in the rte_mbuf structure.
> > 
> > The typical use case is a PMD that registers space for an offload
> > feature, when the application requests to enable this feature.  As
> > the space in mbuf is limited, the space should only be reserved if it
> > is going to be used (i.e when the application explicitly asks for it).
> > 
> > The registration can be done at any moment, but it is not possible
> > to unregister fields or flags for now.
> > 
> > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > ---
> > 
> > rfc -> v1
> > 
> > * Rebase on top of master
> > * Change registration API to use a structure instead of
> >   variables, getting rid of #defines (Stephen's comment)
> > * Update flag registration to use a similar API as fields.
> > * Change max name length from 32 to 64 (sugg. by Thomas)
> > * Enhance API documentation (Haiyue's and Andrew's comments)
> > * Add a debug log at registration
> > * Add some words in release note
> > * Did some performance tests (sugg. by Andrew):
> >   On my platform, reading a dynamic field takes ~3 cycles more
> >   than a static field, and ~2 cycles more for writing.
> > 
> >  app/test/test_mbuf.c                   | 114 ++++++-
> >  doc/guides/rel_notes/release_19_11.rst |   7 +
> >  lib/librte_mbuf/Makefile               |   2 +
> >  lib/librte_mbuf/meson.build            |   6 +-
> >  lib/librte_mbuf/rte_mbuf.h             |  25 +-
> >  lib/librte_mbuf/rte_mbuf_dyn.c         | 408 +++++++++++++++++++++++++
> >  lib/librte_mbuf/rte_mbuf_dyn.h         | 163 ++++++++++
> >  lib/librte_mbuf/rte_mbuf_version.map   |   4 +
> >  8 files changed, 724 insertions(+), 5 deletions(-)
> >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
> >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> > 
> 
> [snip]
> 
> > +/**
> > + * Helper macro to access to a dynamic field.
> > + */
> > +#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) + (offset)))
> 
> How about to change it as: ?
> #define RTE_MBUF_DYNFIELD(m, offset, type) ((type *)((uintptr_t)(m) + (offset)))
>                                                   ^
> Then,
> 	*RTE_MBUF_DYNFIELD(mb, xxx, uint32_t) = yyy;
> 
> Since we use 'type' like: sizeof(type), __alignof__(type), this makes 'type' be
> more consistent, not have to force cast 'type *' when using it.
> 
> 	const struct rte_mbuf_dynfield dynfield2 = {
> 		.name = "test-dynfield2",
> 		.size = sizeof(uint16_t),
> 		.align = __alignof__(uint16_t),
> 		.flags = 0,
> 	};

Yes, I don't see use cases where the '*' is omitted, so it could be in the
macro. On the other hand, doing like in the patch is more consistent with
similar macros like rte_pktmbuf_mtod(), so I'll tend to keep it as is.

This is maybe not that important, because this macro will often be hidden
in a wrapper, like below:

  static inline uint64_t rte_mbuf_dyn_timestamp_get(const struct rte_mbuf *m)
  {
         return *RTE_MBUF_DYNFIELD(m, rte_mbuf_dynfield_timestamp_offset,
                                 uint64_t *);
  }


> And also, when I'm trying to use the dynamic flag, found a macro will be better
> for making code align with dynamic field. Just a small suggestion. ;-)
> 	mb->ol_flags |= RTE_MBUF_DYNFLAG(ol_offset);
> 
> /**
>  * Helper macro to access to a dynamic flag.
>  */
> #define RTE_MBUF_DYNFLAG(offset) (1ULL << (offset))

OK, I will add it in next version.



Thank you for the feedback!

Olivier

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-09-21  8:28   ` Wiles, Keith
@ 2019-09-23  8:56     ` Morten Brørup
  2019-09-23  9:41       ` Olivier Matz
  2019-09-23  9:13     ` Olivier Matz
  1 sibling, 1 reply; 64+ messages in thread
From: Morten Brørup @ 2019-09-23  8:56 UTC (permalink / raw)
  To: Wiles, Keith, Olivier Matz
  Cc: dev, Thomas Monjalon, Wang, Haiyue, Stephen Hemminger,
	Andrew Rybchenko, Jerin Jacob Kollanukkaran, bruce.richardson

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wiles, Keith
> Sent: Saturday, September 21, 2019 10:29 AM
> 
> > On Sep 18, 2019, at 6:54 PM, Olivier Matz <olivier.matz@6wind.com>
> wrote:
> >
> > Many features require to store data inside the mbuf. As the room in
> mbuf
> > structure is limited, it is not possible to have a field for each
> > feature. Also, changing fields in the mbuf structure can break the
> API
> > or ABI.
> >
> > This commit addresses these issues, by enabling the dynamic
> registration
> > of fields or flags:
> >
> > - a dynamic field is a named area in the rte_mbuf structure, with a
> >  given size (>= 1 byte) and alignment constraint.
> > - a dynamic flag is a named bit in the rte_mbuf structure.
> >
> > The typical use case is a PMD that registers space for an offload
> > feature, when the application requests to enable this feature.  As
> > the space in mbuf is limited, the space should only be reserved if it
> > is going to be used (i.e when the application explicitly asks for
> it).
> >
> > The registration can be done at any moment, but it is not possible
> > to unregister fields or flags for now.
> >
> > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > —
> >
> 
> The idea of registration for space in the mbuf I am not a big fan. I
> did like Konstantin’s suggestion of having the compiler help with
> optimizing the code, but with a slight difference. Maybe I
> misunderstand, but now with this design you have to pass the offsets to
> different parts of the application or place in global memory or have
> each section request the offsets. It seems great if the application is
> one big application or an appliance model application having control of
> the whole design not so good for service chains like designs where
> different parts of the whole application is design by different teams.
> 
> Konstantin’s suggest if I understand it was to use structures to allow
> the compiler to optimize the access to the mbuf and I like that idea,
> but with one change we add a field in the mbuf to define the mbuf
> structure type.
> 
> Say 0 is the standard rte_mbuf type then type 1 could be the IPSec
> offset type mbuf, type 2 could be something else, … The type 0 looks
> just like the mbuf we have today with maybe the optional fields set to
> reserved or some type of filler variables to reserve the holes in the
> structure. Then type 1 is the IPSec mbuf and in the reserved sections
> of the mbuf contain the IPSec related data with the standard mbuf
> fields still matching the type 0 version.
> 
> This allows the mbuf to be used by the developer and the compiler now
> knows exactly where the fields are located in the structure and does
> not have to deal with any of the macros and offsets and registration
> suggested here. Just cast the mbuf pointer into the new type mbuf
> structure. We just have to make sure the code that needs to use a given
> mbuf type has access to the structure definitions.
> 
> If the mbufs it going to be translated from one type mbuf to another
> mbuf type, we just have to define that type and then cast the mbuf
> pointer to that structure. When an mbuf is received from IPSec PMD then
> the application needs to forward that mbuf to the next stage it can
> reset the type to 0 or to another type filling in the reserved fields
> to be used by the next stage in the pipeline.
> 
> The mbuf now contains the type and every point in the application can
> look at the type to determine how that mbuf is defined. I am sure there
> are some holes here, but I think it is a better solution then using all
> of these macros, offset values and registration APIs.
> 
> 
> Regards,
> Keith

First of all, I applaud the idea of cleaning up the mbuf structure and removing the fields only rarely used and/or for special use cases only, as mentioned in the presentation, e.g. timestamp, timesync and seqn. It is great seeing serious effort put into improving the very core of DPDK!

However, after some hallway discussions at DPDK Userspace and further thinking about the details, I can see two additional risks by introducing dynamic mbufs, which I would like to share for your consideration:

1. It may prevent us from adding future solutions not yet considered.

If we were to introduce new functions for more granular handling of mbufs, similar to some of the Linux kernel's skbuff handling functions, how should such functions handle the dynamic fields? And how is the rte_pktmbuf_clone() function supposed to handle the dynamic fields? Some fields may need to be copied as-is, some may need to be initialized to zero or some other value, and so on. It is apparently not a problem now; but dynamic mbufs may prevent us from adding some of such functions in the future.

I admit that I can only imagine the issue on an abstract level, so I can't give you a concrete example. Perhaps some of the more experienced Linux developers can provide one or debunk my concern. (Stephen: In relation to packet capturing we were discussing the reference counter not being respected by some applications, and the possible need for more granular mbuf handling.)

2. In the long run, we might end up adding more fields than we remove.

Dynamic mbufs makes it easier to add specialized fields, which is great. But when the barrier to adding specialized fields is reduced, more PMDs and libraries may add their own unique fields, rather than going through a discussion and consensus for adding them to the fixed mbuf structure or finding some other solution. And if a multitude of PMDs each need a specialized field, PMDs might end up adding variants of a field, rather than reaching consensus for a common standard. It will be much easier to just add your own specialized mbuf field rather than having to go through the standardization process in the DPDK community.

I will use the timestamp field as a theoretic example: The packet timestamp measurement unit differs between vendors, so with dynamic mbufs one vendor's PMD might create a timestamp_ns field, counting in nanoseconds, and another vendor's PMD might create a timestamp_clocks field, counting in clock counts. With the fixed mbuf, this triggered a public discussion, and a compromise for the field was reached.


Although I am worried about dynamic mbufs, I don't have a better suggestion. And perhaps I just worry too much.


Med venlig hilsen / kind regards
- Morten Brørup


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-09-21  8:28   ` Wiles, Keith
  2019-09-23  8:56     ` Morten Brørup
@ 2019-09-23  9:13     ` Olivier Matz
  2019-09-23 15:14       ` Wiles, Keith
  2019-09-23 16:09       ` Wiles, Keith
  1 sibling, 2 replies; 64+ messages in thread
From: Olivier Matz @ 2019-09-23  9:13 UTC (permalink / raw)
  To: Wiles, Keith
  Cc: dev, Thomas Monjalon, Wang, Haiyue, Stephen Hemminger,
	Andrew Rybchenko, Jerin Jacob Kollanukkaran

Hi Keith,

On Sat, Sep 21, 2019 at 08:28:32AM +0000, Wiles, Keith wrote:
> 
> 
> > On Sep 18, 2019, at 6:54 PM, Olivier Matz <olivier.matz@6wind.com> wrote:
> > 
> > Many features require to store data inside the mbuf. As the room in mbuf
> > structure is limited, it is not possible to have a field for each
> > feature. Also, changing fields in the mbuf structure can break the API
> > or ABI.
> > 
> > This commit addresses these issues, by enabling the dynamic registration
> > of fields or flags:
> > 
> > - a dynamic field is a named area in the rte_mbuf structure, with a
> >  given size (>= 1 byte) and alignment constraint.
> > - a dynamic flag is a named bit in the rte_mbuf structure.
> > 
> > The typical use case is a PMD that registers space for an offload
> > feature, when the application requests to enable this feature.  As
> > the space in mbuf is limited, the space should only be reserved if it
> > is going to be used (i.e when the application explicitly asks for it).
> > 
> > The registration can be done at any moment, but it is not possible
> > to unregister fields or flags for now.
> > 
> > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > —
> > 
> 

> The idea of registration for space in the mbuf I am not a big fan. I did like
> Konstantin’s suggestion of having the compiler help with optimizing the code,
> but with a slight difference. Maybe I misunderstand, but now with this design
> you have to pass the offsets to different parts of the application or place in
> global memory or have each section request the offsets. It seems great if the
> application is one big application or an appliance model application having
> control of the whole design not so good for service chains like designs where
> different parts of the whole application is design by different teams.

If the global variable storing the offset is defined in the mbuf layer, what
would be the problem?

The only things you would have to do is:

1/ ensure the offset is registered
   rte_mbuf_dyn_timestamp_register()

2/ use helpers
   rte_mbuf_dyn_timestamp_get(), rte_mbuf_dyn_timestamp_set(), ...

> Konstantin’s suggest if I understand it was to use structures to allow the
> compiler to optimize the access to the mbuf and I like that idea, but with one
> change we add a field in the mbuf to define the mbuf structure type.
>
> Say 0 is the standard rte_mbuf type then type 1 could be the IPSec offset type
> mbuf, type 2 could be something else, … The type 0 looks just like the mbuf we
> have today with maybe the optional fields set to reserved or some type of
> filler variables to reserve the holes in the structure. Then type 1 is the
> IPSec mbuf and in the reserved sections of the mbuf contain the IPSec related
> data with the standard mbuf fields still matching the type 0 version.

This very look like the "selective layout" in our presentation [1], page 14.

Your example talks about IPsec, but someone else will want to use a
sequence number, another one a timestamp, and another one will want to
use this space for its own application. There are a lot of use cases,
and it does not scale to have a layout for each of them. Worst, if
someone wants IPsec + a sequence number, how can it work?

One of the problem to solve is to avoid mutually exclusive feature (i.e.
union of fields that cannot be used together in the mbuf).

> This allows the mbuf to be used by the developer and the compiler now knows
> exactly where the fields are located in the structure and does not have to
> deal with any of the macros and offsets and registration suggested here. Just
> cast the mbuf pointer into the new type mbuf structure. We just have to make
> sure the code that needs to use a given mbuf type has access to the structure
> definitions.

With the current proposal, we can imagine an API to ask to register a
field at a specific offset. It can then be used in the application, so
that accesses are done at no cost compared to a static field, because
the offset would be const.

In the driver, the same logic could be used, but dynamically:

  if (offset == PREFERRED_OFFSET) {
    /* code with static offset */
  } else {
    /* generic code */
  }

But I'm not sure it would scale a lot if there are several features
using dynamic fields.

> If the mbufs it going to be translated from one type mbuf to another mbuf
> type, we just have to define that type and then cast the mbuf pointer to that
> structure. When an mbuf is received from IPSec PMD then the application needs
> to forward that mbuf to the next stage it can reset the type to 0 or to
> another type filling in the reserved fields to be used by the next stage in
> the pipeline.

What you describe is one use case.

What could be done with the API mentionned above (but I think it is
dangerous), is to allow a user to register 2 different fields at the
same offset, using a specific flag. This could work if the user knows
that these 2 fields are never used at the same time.

> The mbuf now contains the type and every point in the application can look at
> the type to determine how that mbuf is defined. I am sure there are some holes
> here, but I think it is a better solution then using all of these macros,
> offset values and registration APIs.

I'm not convinced having selective layouts is doable. The layouts cannot
fit all possible use cases, and managing the different layouts in the
driver looks difficult to me. Additionnaly, it does not solve the
problem of mutually exclusive features.


Thanks for the feedback.
Olivier

[1] https://static.sched.com/hosted_files/dpdkbordeaux2019/2b/dpdk-201909-dyn-mbuf.pdf

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-09-23  8:56     ` Morten Brørup
@ 2019-09-23  9:41       ` Olivier Matz
  0 siblings, 0 replies; 64+ messages in thread
From: Olivier Matz @ 2019-09-23  9:41 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Wiles, Keith, dev, Thomas Monjalon, Wang, Haiyue,
	Stephen Hemminger, Andrew Rybchenko, Jerin Jacob Kollanukkaran,
	bruce.richardson

Hi Morten,

On Mon, Sep 23, 2019 at 10:56:01AM +0200, Morten Brørup wrote:
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wiles, Keith
> > Sent: Saturday, September 21, 2019 10:29 AM
> > 
> > > On Sep 18, 2019, at 6:54 PM, Olivier Matz <olivier.matz@6wind.com>
> > wrote:
> > >
> > > Many features require to store data inside the mbuf. As the room in
> > mbuf
> > > structure is limited, it is not possible to have a field for each
> > > feature. Also, changing fields in the mbuf structure can break the
> > API
> > > or ABI.
> > >
> > > This commit addresses these issues, by enabling the dynamic
> > registration
> > > of fields or flags:
> > >
> > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > >  given size (>= 1 byte) and alignment constraint.
> > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > >
> > > The typical use case is a PMD that registers space for an offload
> > > feature, when the application requests to enable this feature.  As
> > > the space in mbuf is limited, the space should only be reserved if it
> > > is going to be used (i.e when the application explicitly asks for
> > it).
> > >
> > > The registration can be done at any moment, but it is not possible
> > > to unregister fields or flags for now.
> > >
> > > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > > —
> > >
> > 
> > The idea of registration for space in the mbuf I am not a big fan. I
> > did like Konstantin’s suggestion of having the compiler help with
> > optimizing the code, but with a slight difference. Maybe I
> > misunderstand, but now with this design you have to pass the offsets to
> > different parts of the application or place in global memory or have
> > each section request the offsets. It seems great if the application is
> > one big application or an appliance model application having control of
> > the whole design not so good for service chains like designs where
> > different parts of the whole application is design by different teams.
> > 
> > Konstantin’s suggest if I understand it was to use structures to allow
> > the compiler to optimize the access to the mbuf and I like that idea,
> > but with one change we add a field in the mbuf to define the mbuf
> > structure type.
> > 
> > Say 0 is the standard rte_mbuf type then type 1 could be the IPSec
> > offset type mbuf, type 2 could be something else, … The type 0 looks
> > just like the mbuf we have today with maybe the optional fields set to
> > reserved or some type of filler variables to reserve the holes in the
> > structure. Then type 1 is the IPSec mbuf and in the reserved sections
> > of the mbuf contain the IPSec related data with the standard mbuf
> > fields still matching the type 0 version.
> > 
> > This allows the mbuf to be used by the developer and the compiler now
> > knows exactly where the fields are located in the structure and does
> > not have to deal with any of the macros and offsets and registration
> > suggested here. Just cast the mbuf pointer into the new type mbuf
> > structure. We just have to make sure the code that needs to use a given
> > mbuf type has access to the structure definitions.
> > 
> > If the mbufs it going to be translated from one type mbuf to another
> > mbuf type, we just have to define that type and then cast the mbuf
> > pointer to that structure. When an mbuf is received from IPSec PMD then
> > the application needs to forward that mbuf to the next stage it can
> > reset the type to 0 or to another type filling in the reserved fields
> > to be used by the next stage in the pipeline.
> > 
> > The mbuf now contains the type and every point in the application can
> > look at the type to determine how that mbuf is defined. I am sure there
> > are some holes here, but I think it is a better solution then using all
> > of these macros, offset values and registration APIs.
> > 
> > 
> > Regards,
> > Keith
> 
>
> First of all, I applaud the idea of cleaning up the mbuf structure and
> removing the fields only rarely used and/or for special use cases only, as
> mentioned in the presentation, e.g. timestamp, timesync and seqn. It is great
> seeing serious effort put into improving the very core of DPDK!
>
> However, after some hallway discussions at DPDK Userspace and further thinking
> about the details, I can see two additional risks by introducing dynamic
> mbufs, which I would like to share for your consideration:
>
> 1. It may prevent us from adding future solutions not yet considered.
>
> If we were to introduce new functions for more granular handling of mbufs,
> similar to some of the Linux kernel's skbuff handling functions, how should
> such functions handle the dynamic fields? And how is the rte_pktmbuf_clone()
> function supposed to handle the dynamic fields? Some fields may need to be
> copied as-is, some may need to be initialized to zero or some other value, and
> so on. It is apparently not a problem now; but dynamic mbufs may prevent us
> from adding some of such functions in the future.
>
> I admit that I can only imagine the issue on an abstract level, so I can't
> give you a concrete example. Perhaps some of the more experienced Linux
> developers can provide one or debunk my concern. (Stephen: In relation to
> packet capturing we were discussing the reference counter not being respected
> by some applications, and the possible need for more granular mbuf handling.)

For now, the clone copies the fields. If we introduce a copy function, I
think it should do the same. Right now, I cannot find a use case where
the field should be set to another value, but yes, this could be a
limitation.

> 2. In the long run, we might end up adding more fields than we remove.
>
> Dynamic mbufs makes it easier to add specialized fields, which is great. But
> when the barrier to adding specialized fields is reduced, more PMDs and
> libraries may add their own unique fields, rather than going through a
> discussion and consensus for adding them to the fixed mbuf structure or
> finding some other solution. And if a multitude of PMDs each need a
> specialized field, PMDs might end up adding variants of a field, rather than
> reaching consensus for a common standard. It will be much easier to just add
> your own specialized mbuf field rather than having to go through the
> standardization process in the DPDK community.
>
> I will use the timestamp field as a theoretic example: The packet timestamp
> measurement unit differs between vendors, so with dynamic mbufs one vendor's
> PMD might create a timestamp_ns field, counting in nanoseconds, and another
> vendor's PMD might create a timestamp_clocks field, counting in clock
> counts. With the fixed mbuf, this triggered a public discussion, and a
> compromise for the field was reached.

In the mbuf structure, there is not a lot of room to describe some of
the fields, especially the ones that are inside a structure of union of
structure of union of union ;)

The definition of a dynamic field is in a dedicated structure, so there
is more room to describe it. For public dynamic fields, we have to be as
strict as for static fields, because it will be a public API. It has to
be correctly defined. About your timestamp example, we should not allow
2 different timestamp formats in the public API.

For private fields (Lib only, App only, PMD only), it's less critical
because it is not public, even if it's always good to have a clear
description.


> Although I am worried about dynamic mbufs, I don't have a better
> suggestion. And perhaps I just worry too much.

Feedback is always good to have, thanks.

Olivier

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-09-23  8:31     ` Olivier Matz
@ 2019-09-23 11:01       ` Wang, Haiyue
  0 siblings, 0 replies; 64+ messages in thread
From: Wang, Haiyue @ 2019-09-23 11:01 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Thomas Monjalon, Stephen Hemminger, Andrew Rybchenko, Wiles,
	Keith, Jerin Jacob Kollanukkaran

> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Monday, September 23, 2019 16:32
> To: Wang, Haiyue <haiyue.wang@intel.com>
> Cc: dev@dpdk.org; Thomas Monjalon <thomas@monjalon.net>; Stephen Hemminger
> <stephen@networkplumber.org>; Andrew Rybchenko <arybchenko@solarflare.com>; Wiles, Keith
> <keith.wiles@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>
> Subject: Re: [PATCH] mbuf: support dynamic fields and flags
> 
> Hi,
> 
> On Sat, Sep 21, 2019 at 04:54:39AM +0000, Wang, Haiyue wrote:
> > > -----Original Message-----
> > > From: Olivier Matz [mailto:olivier.matz@6wind.com]
> > > Sent: Thursday, September 19, 2019 00:55
> > > To: dev@dpdk.org
> > > Cc: Thomas Monjalon <thomas@monjalon.net>; Wang, Haiyue <haiyue.wang@intel.com>; Stephen Hemminger
> > > <stephen@networkplumber.org>; Andrew Rybchenko <arybchenko@solarflare.com>; Wiles, Keith
> > > <keith.wiles@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>
> > > Subject: [PATCH] mbuf: support dynamic fields and flags
> > >
> > > Many features require to store data inside the mbuf. As the room in mbuf
> > > structure is limited, it is not possible to have a field for each
> > > feature. Also, changing fields in the mbuf structure can break the API
> > > or ABI.
> > >
> > > This commit addresses these issues, by enabling the dynamic registration
> > > of fields or flags:
> > >
> > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > >   given size (>= 1 byte) and alignment constraint.
> > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > >
> > > The typical use case is a PMD that registers space for an offload
> > > feature, when the application requests to enable this feature.  As
> > > the space in mbuf is limited, the space should only be reserved if it
> > > is going to be used (i.e when the application explicitly asks for it).
> > >
> > > The registration can be done at any moment, but it is not possible
> > > to unregister fields or flags for now.
> > >
> > > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > > ---
> > >
> > > rfc -> v1
> > >
> > > * Rebase on top of master
> > > * Change registration API to use a structure instead of
> > >   variables, getting rid of #defines (Stephen's comment)
> > > * Update flag registration to use a similar API as fields.
> > > * Change max name length from 32 to 64 (sugg. by Thomas)
> > > * Enhance API documentation (Haiyue's and Andrew's comments)
> > > * Add a debug log at registration
> > > * Add some words in release note
> > > * Did some performance tests (sugg. by Andrew):
> > >   On my platform, reading a dynamic field takes ~3 cycles more
> > >   than a static field, and ~2 cycles more for writing.
> > >
> > >  app/test/test_mbuf.c                   | 114 ++++++-
> > >  doc/guides/rel_notes/release_19_11.rst |   7 +
> > >  lib/librte_mbuf/Makefile               |   2 +
> > >  lib/librte_mbuf/meson.build            |   6 +-
> > >  lib/librte_mbuf/rte_mbuf.h             |  25 +-
> > >  lib/librte_mbuf/rte_mbuf_dyn.c         | 408 +++++++++++++++++++++++++
> > >  lib/librte_mbuf/rte_mbuf_dyn.h         | 163 ++++++++++
> > >  lib/librte_mbuf/rte_mbuf_version.map   |   4 +
> > >  8 files changed, 724 insertions(+), 5 deletions(-)
> > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
> > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> > >
> >
> > [snip]
> >
> > > +/**
> > > + * Helper macro to access to a dynamic field.
> > > + */
> > > +#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) + (offset)))
> >
> > How about to change it as: ?
> > #define RTE_MBUF_DYNFIELD(m, offset, type) ((type *)((uintptr_t)(m) + (offset)))
> >                                                   ^
> > Then,
> > 	*RTE_MBUF_DYNFIELD(mb, xxx, uint32_t) = yyy;
> >
> > Since we use 'type' like: sizeof(type), __alignof__(type), this makes 'type' be
> > more consistent, not have to force cast 'type *' when using it.
> >
> > 	const struct rte_mbuf_dynfield dynfield2 = {
> > 		.name = "test-dynfield2",
> > 		.size = sizeof(uint16_t),
> > 		.align = __alignof__(uint16_t),
> > 		.flags = 0,
> > 	};
> 
> Yes, I don't see use cases where the '*' is omitted, so it could be in the
> macro. On the other hand, doing like in the patch is more consistent with
> similar macros like rte_pktmbuf_mtod(), so I'll tend to keep it as is.
> 
> This is maybe not that important, because this macro will often be hidden
> in a wrapper, like below:
> 
>   static inline uint64_t rte_mbuf_dyn_timestamp_get(const struct rte_mbuf *m)
>   {
>          return *RTE_MBUF_DYNFIELD(m, rte_mbuf_dynfield_timestamp_offset,
>                                  uint64_t *);
>   }
> 

Thanks, yes, the same style as 'rte_pktmbuf_mtod', I didn't notice it.


> 
> Thank you for the feedback!
> 
> Olivier

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-09-23  9:13     ` Olivier Matz
@ 2019-09-23 15:14       ` Wiles, Keith
  2019-09-23 16:16         ` Olivier Matz
  2019-09-23 16:09       ` Wiles, Keith
  1 sibling, 1 reply; 64+ messages in thread
From: Wiles, Keith @ 2019-09-23 15:14 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Thomas Monjalon, Wang, Haiyue, Stephen Hemminger,
	Andrew Rybchenko, Jerin Jacob Kollanukkaran



On Sep 23, 2019, at 4:13 AM, Olivier Matz <olivier.matz@6wind.com<mailto:olivier.matz@6wind.com>> wrote:

Hi Keith,

On Sat, Sep 21, 2019 at 08:28:32AM +0000, Wiles, Keith wrote:


On Sep 18, 2019, at 6:54 PM, Olivier Matz <olivier.matz@6wind.com<mailto:olivier.matz@6wind.com>> wrote:

Many features require to store data inside the mbuf. As the room in mbuf
structure is limited, it is not possible to have a field for each
feature. Also, changing fields in the mbuf structure can break the API
or ABI.

This commit addresses these issues, by enabling the dynamic registration
of fields or flags:

- a dynamic field is a named area in the rte_mbuf structure, with a
given size (>= 1 byte) and alignment constraint.
- a dynamic flag is a named bit in the rte_mbuf structure.

The typical use case is a PMD that registers space for an offload
feature, when the application requests to enable this feature.  As
the space in mbuf is limited, the space should only be reserved if it
is going to be used (i.e when the application explicitly asks for it).

The registration can be done at any moment, but it is not possible
to unregister fields or flags for now.

Signed-off-by: Olivier Matz <olivier.matz@6wind.com<mailto:olivier.matz@6wind.com>>
Acked-by: Thomas Monjalon <thomas@monjalon.net<mailto:thomas@monjalon.net>>
—



The idea of registration for space in the mbuf I am not a big fan. I did like
Konstantin’s suggestion of having the compiler help with optimizing the code,
but with a slight difference. Maybe I misunderstand, but now with this design
you have to pass the offsets to different parts of the application or place in
global memory or have each section request the offsets. It seems great if the
application is one big application or an appliance model application having
control of the whole design not so good for service chains like designs where
different parts of the whole application is design by different teams.

If the global variable storing the offset is defined in the mbuf layer, what
would be the problem?

Are you assuming the values are shared between primary/secondary model or between processes using shared memory? If moving the packet data via shared memory to a different application written by a different company you still have to move that metadata. If the type was carried with the mbuf we can easily convey a small type value or we would need to tell the other side we have all of this registration information to send. I would suggest the number of mbuf types will be small over time and I believe a 4 bit or 8 bit type is reasonable. In many protocols using a type value is used to convey this type of information. We can even tightly control the number of types DPDK controls and then leave some for user defined if we like.


The only things you would have to do is:

1/ ensure the offset is registered
  rte_mbuf_dyn_timestamp_register()

2/ use helpers
  rte_mbuf_dyn_timestamp_get(), rte_mbuf_dyn_timestamp_set(), ...

Konstantin’s suggest if I understand it was to use structures to allow the
compiler to optimize the access to the mbuf and I like that idea, but with one
change we add a field in the mbuf to define the mbuf structure type.

Say 0 is the standard rte_mbuf type then type 1 could be the IPSec offset type
mbuf, type 2 could be something else, … The type 0 looks just like the mbuf we
have today with maybe the optional fields set to reserved or some type of
filler variables to reserve the holes in the structure. Then type 1 is the
IPSec mbuf and in the reserved sections of the mbuf contain the IPSec related
data with the standard mbuf fields still matching the type 0 version.

This very look like the "selective layout" in our presentation [1], page 14.

Your example talks about IPsec, but someone else will want to use a
sequence number, another one a timestamp, and another one will want to
use this space for its own application. There are a lot of use cases,
and it does not scale to have a layout for each of them. Worst, if
someone wants IPsec + a sequence number, how can it work?

One of the problem to solve is to avoid mutually exclusive feature (i.e.
union of fields that cannot be used together in the mbuf).

This allows the mbuf to be used by the developer and the compiler now knows
exactly where the fields are located in the structure and does not have to
deal with any of the macros and offsets and registration suggested here. Just
cast the mbuf pointer into the new type mbuf structure. We just have to make
sure the code that needs to use a given mbuf type has access to the structure
definitions.

With the current proposal, we can imagine an API to ask to register a
field at a specific offset. It can then be used in the application, so
that accesses are done at no cost compared to a static field, because
the offset would be const.

In the driver, the same logic could be used, but dynamically:

 if (offset == PREFERRED_OFFSET) {
   /* code with static offset */
 } else {
   /* generic code */
 }

But I'm not sure it would scale a lot if there are several features
using dynamic fields.

If the mbufs it going to be translated from one type mbuf to another mbuf
type, we just have to define that type and then cast the mbuf pointer to that
structure. When an mbuf is received from IPSec PMD then the application needs
to forward that mbuf to the next stage it can reset the type to 0 or to
another type filling in the reserved fields to be used by the next stage in
the pipeline.

What you describe is one use case.

What could be done with the API mentionned above (but I think it is
dangerous), is to allow a user to register 2 different fields at the
same offset, using a specific flag. This could work if the user knows
that these 2 fields are never used at the same time.

The mbuf now contains the type and every point in the application can look at
the type to determine how that mbuf is defined. I am sure there are some holes
here, but I think it is a better solution then using all of these macros,
offset values and registration APIs.

I'm not convinced having selective layouts is doable. The layouts cannot
fit all possible use cases, and managing the different layouts in the
driver looks difficult to me. Additionnaly, it does not solve the
problem of mutually exclusive features.

I too at one time wanted some type of allocation or registration for private mbuf space and applying to these limited fields in the mbuf header may have been reasonable. The problem is using registration and moving that information between processes is going to be hard to get right. For a single Appliance model application it would work great and not in a non-appliance model applications. The type/structure method can help and it could have problems too, but using a type/struct design seems to be one of the BKMs (Best Known Methods) in the industry.

To be honest it maybe we just take the hit in performance and add a third cache line as I am sure trying to squeeze metadata into these very limit fields will be a challenge IMO. I am not suggesting we add a cache line to every mbuf only to the pools that require the extra metadata by using the private space if that is reasonable. The applications needing a lot of metadata will just have to take the hit in performance anyway.

Having to grab a metadata value via a set of macros and inline functions seems like it will consume more cycles then just a type/structure method as the compiler will help optimize the code without having to call any macros or inline functions.


Thanks for the feedback.
Olivier

[1] https://static.sched.com/hosted_files/dpdkbordeaux2019/2b/dpdk-201909-dyn-mbuf.pdf

Regards,
Keith


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-09-23  9:13     ` Olivier Matz
  2019-09-23 15:14       ` Wiles, Keith
@ 2019-09-23 16:09       ` Wiles, Keith
  1 sibling, 0 replies; 64+ messages in thread
From: Wiles, Keith @ 2019-09-23 16:09 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Thomas Monjalon, Wang, Haiyue, Stephen Hemminger,
	Andrew Rybchenko, Jerin Jacob Kollanukkaran

Sorry, resend in plain text :-(

> On Sep 23, 2019, at 4:13 AM, Olivier Matz <olivier.matz@6wind.com> wrote:
> 
> Hi Keith,
> 
> On Sat, Sep 21, 2019 at 08:28:32AM +0000, Wiles, Keith wrote:
>> 
>> 
>>> On Sep 18, 2019, at 6:54 PM, Olivier Matz <olivier.matz@6wind.com> wrote:
>>> 
>>> Many features require to store data inside the mbuf. As the room in mbuf
>>> structure is limited, it is not possible to have a field for each
>>> feature. Also, changing fields in the mbuf structure can break the API
>>> or ABI.
>>> 
>>> This commit addresses these issues, by enabling the dynamic registration
>>> of fields or flags:
>>> 
>>> - a dynamic field is a named area in the rte_mbuf structure, with a
>>> given size (>= 1 byte) and alignment constraint.
>>> - a dynamic flag is a named bit in the rte_mbuf structure.
>>> 
>>> The typical use case is a PMD that registers space for an offload
>>> feature, when the application requests to enable this feature.  As
>>> the space in mbuf is limited, the space should only be reserved if it
>>> is going to be used (i.e when the application explicitly asks for it).
>>> 
>>> The registration can be done at any moment, but it is not possible
>>> to unregister fields or flags for now.
>>> 
>>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
>>> Acked-by: Thomas Monjalon <thomas@monjalon.net>
>>> —
>>> 
>> 
> 
>> The idea of registration for space in the mbuf I am not a big fan. I did like
>> Konstantin’s suggestion of having the compiler help with optimizing the code,
>> but with a slight difference. Maybe I misunderstand, but now with this design
>> you have to pass the offsets to different parts of the application or place in
>> global memory or have each section request the offsets. It seems great if the
>> application is one big application or an appliance model application having
>> control of the whole design not so good for service chains like designs where
>> different parts of the whole application is design by different teams.
> 
> If the global variable storing the offset is defined in the mbuf layer, what
> would be the problem?
> 

Are you assuming the values are shared between primary/secondary model or between processes using shared memory? If moving the packet data via shared memory to a different application written by a different company you still have to move that metadata. If the type was carried with the mbuf we can easily convey a small type value or we would need to tell the other side we have all of this registration information to send. I would suggest the number of mbuf types will be small over time and I believe a 4 bit or 8 bit type is reasonable. In many protocols using a type value is used to convey this type of information. We can even tightly control the number of types DPDK controls and then leave some for user defined if we like.


> The only things you would have to do is:
> 
> 1/ ensure the offset is registered
>   rte_mbuf_dyn_timestamp_register()
> 
> 2/ use helpers
>   rte_mbuf_dyn_timestamp_get(), rte_mbuf_dyn_timestamp_set(), ...
> 
>> Konstantin’s suggest if I understand it was to use structures to allow the
>> compiler to optimize the access to the mbuf and I like that idea, but with one
>> change we add a field in the mbuf to define the mbuf structure type.
>> 
>> Say 0 is the standard rte_mbuf type then type 1 could be the IPSec offset type
>> mbuf, type 2 could be something else, … The type 0 looks just like the mbuf we
>> have today with maybe the optional fields set to reserved or some type of
>> filler variables to reserve the holes in the structure. Then type 1 is the
>> IPSec mbuf and in the reserved sections of the mbuf contain the IPSec related
>> data with the standard mbuf fields still matching the type 0 version.
> 
> This very look like the "selective layout" in our presentation [1], page 14.
> 
> Your example talks about IPsec, but someone else will want to use a
> sequence number, another one a timestamp, and another one will want to
> use this space for its own application. There are a lot of use cases,
> and it does not scale to have a layout for each of them. Worst, if
> someone wants IPsec + a sequence number, how can it work?
> 
> One of the problem to solve is to avoid mutually exclusive feature (i.e.
> union of fields that cannot be used together in the mbuf).
> 
>> This allows the mbuf to be used by the developer and the compiler now knows
>> exactly where the fields are located in the structure and does not have to
>> deal with any of the macros and offsets and registration suggested here. Just
>> cast the mbuf pointer into the new type mbuf structure. We just have to make
>> sure the code that needs to use a given mbuf type has access to the structure
>> definitions.
> 
> With the current proposal, we can imagine an API to ask to register a
> field at a specific offset. It can then be used in the application, so
> that accesses are done at no cost compared to a static field, because
> the offset would be const.
> 
> In the driver, the same logic could be used, but dynamically:
> 
>  if (offset == PREFERRED_OFFSET) {
>    /* code with static offset */
>  } else {
>    /* generic code */
>  }
> 
> But I'm not sure it would scale a lot if there are several features
> using dynamic fields.
> 
>> If the mbufs it going to be translated from one type mbuf to another mbuf
>> type, we just have to define that type and then cast the mbuf pointer to that
>> structure. When an mbuf is received from IPSec PMD then the application needs
>> to forward that mbuf to the next stage it can reset the type to 0 or to
>> another type filling in the reserved fields to be used by the next stage in
>> the pipeline.
> 
> What you describe is one use case.
> 
> What could be done with the API mentionned above (but I think it is
> dangerous), is to allow a user to register 2 different fields at the
> same offset, using a specific flag. This could work if the user knows
> that these 2 fields are never used at the same time.
> 
>> The mbuf now contains the type and every point in the application can look at
>> the type to determine how that mbuf is defined. I am sure there are some holes
>> here, but I think it is a better solution then using all of these macros,
>> offset values and registration APIs.
> 
> I'm not convinced having selective layouts is doable. The layouts cannot
> fit all possible use cases, and managing the different layouts in the
> driver looks difficult to me. Additionnaly, it does not solve the
> problem of mutually exclusive features.
> 

I too at one time wanted some type of allocation or registration for private mbuf space and applying to these limited fields in the mbuf header may have been reasonable. The problem is using registration and moving that information between processes is going to be hard to get right. For a single Appliance model application it would work great and not in a non-appliance model applications. The type/structure method can help and it could have problems too, but using a type/struct design seems to be one of the BKMs (Best Known Methods) in the industry.

To be honest it maybe we just take the hit in performance and add a third cache line as I am sure trying to squeeze metadata into these very limit fields will be a challenge IMO. I am not suggesting we add a cache line to every mbuf only to the pools that require the extra metadata by using the private space if that is reasonable. The applications needing a lot of metadata will just have to take the hit in performance anyway.

Having to grab a metadata value via a set of macros and inline functions seems like it will consume more cycles then just a type/structure method as the compiler will help optimize the code without having to call any macros or inline functions.

> 
> Thanks for the feedback.
> Olivier
> 
> [1] https://static.sched.com/hosted_files/dpdkbordeaux2019/2b/dpdk-201909-dyn-mbuf.pdf

Regards,
Keith


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-09-23 15:14       ` Wiles, Keith
@ 2019-09-23 16:16         ` Olivier Matz
  2019-09-23 17:14           ` Wiles, Keith
  0 siblings, 1 reply; 64+ messages in thread
From: Olivier Matz @ 2019-09-23 16:16 UTC (permalink / raw)
  To: Wiles, Keith
  Cc: dev, Thomas Monjalon, Wang, Haiyue, Stephen Hemminger,
	Andrew Rybchenko, Jerin Jacob Kollanukkaran

Hi,

(reformated the quotes)

On Mon, Sep 23, 2019 at 03:14:51PM +0000, Wiles, Keith wrote:
> 
> 
> On Sep 23, 2019, at 4:13 AM, Olivier Matz <olivier.matz@6wind.com<mailto:olivier.matz@6wind.com>> wrote:
> > 
> > Hi Keith,
> > 
> > On Sat, Sep 21, 2019 at 08:28:32AM +0000, Wiles, Keith wrote:
> > > 
> > > 
> > > On Sep 18, 2019, at 6:54 PM, Olivier Matz <olivier.matz@6wind.com<mailto:olivier.matz@6wind.com>> wrote:
> > > 
> > > > Many features require to store data inside the mbuf. As the room in mbuf
> > > > structure is limited, it is not possible to have a field for each
> > > > feature. Also, changing fields in the mbuf structure can break the API
> > > > or ABI.
> > > > 
> > > > This commit addresses these issues, by enabling the dynamic registration
> > > > of fields or flags:
> > > > 
> > > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > > > given size (>= 1 byte) and alignment constraint.
> > > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > > > 
> > > > The typical use case is a PMD that registers space for an offload
> > > > feature, when the application requests to enable this feature.  As
> > > > the space in mbuf is limited, the space should only be reserved if it
> > > > is going to be used (i.e when the application explicitly asks for it).
> > > > 
> > > > The registration can be done at any moment, but it is not possible
> > > > to unregister fields or flags for now.
> > > > 
> > > > Signed-off-by: Olivier Matz <olivier.matz@6wind.com<mailto:olivier.matz@6wind.com>>
> > > > Acked-by: Thomas Monjalon <thomas@monjalon.net<mailto:thomas@monjalon.net>>
> > > —
> > > 
> > > 
> > > 
> > > The idea of registration for space in the mbuf I am not a big fan. I did like
> > > Konstantin’s suggestion of having the compiler help with optimizing the code,
> > > but with a slight difference. Maybe I misunderstand, but now with this design
> > > you have to pass the offsets to different parts of the application or place in
> > > global memory or have each section request the offsets. It seems great if the
> > > application is one big application or an appliance model application having
> > > control of the whole design not so good for service chains like designs where
> > > different parts of the whole application is design by different teams.
> > 
> > If the global variable storing the offset is defined in the mbuf layer, what
> > would be the problem?
>
> Are you assuming the values are shared between primary/secondary model or
> between processes using shared memory? If moving the packet data via shared
> memory to a different application written by a different company you still
> have to move that metadata.

The dynamic mbuf proposal works with secondary processes. What does that
change if the application is written by a different company? If you need
to store a timestamp, you register the timestamp and the offset will be
the same in primary and secondary.


> If the type was carried with the mbuf we can easily convey a small
> type value or we would need to tell the other side we have all of this
> registration information to send. I would suggest the number of mbuf
> types will be small over time and I believe a 4 bit or 8 bit type is
> reasonable. In many protocols using a type value is used to convey
> this type of information. We can even tightly control the number of
> types DPDK controls and then leave some for user defined if we like.

8 bits means 256 different mbuf layouts.
You did not replied to my previous questions:

- what happens if you need a field from layout1 and another from layout2?
  (ex: timestamp + ipsec, timestamp + seqn, seqn + ipsec, ...)
- how do you implement the rx/tx drivers functions if you have to support
  several layouts, where a field may be at a different offset?

> > > The only things you would have to do is:
> > > 
> > > 1/ ensure the offset is registered
> > >   rte_mbuf_dyn_timestamp_register()
> > > 
> > > 2/ use helpers
> > >   rte_mbuf_dyn_timestamp_get(), rte_mbuf_dyn_timestamp_set(), ...
> 
> > Konstantin’s suggest if I understand it was to use structures to allow the
> > compiler to optimize the access to the mbuf and I like that idea, but with one
> > change we add a field in the mbuf to define the mbuf structure type.
> > 
> > Say 0 is the standard rte_mbuf type then type 1 could be the IPSec offset type
> > mbuf, type 2 could be something else, … The type 0 looks just like the mbuf we
> > have today with maybe the optional fields set to reserved or some type of
> > filler variables to reserve the holes in the structure. Then type 1 is the
> > IPSec mbuf and in the reserved sections of the mbuf contain the IPSec related
> > data with the standard mbuf fields still matching the type 0 version.
> 
> This very look like the "selective layout" in our presentation [1], page 14.
> 
> Your example talks about IPsec, but someone else will want to use a
> sequence number, another one a timestamp, and another one will want to
> use this space for its own application. There are a lot of use cases,
> and it does not scale to have a layout for each of them. Worst, if
> someone wants IPsec + a sequence number, how can it work?
> 
> One of the problem to solve is to avoid mutually exclusive feature (i.e.
> union of fields that cannot be used together in the mbuf).
> 
> This allows the mbuf to be used by the developer and the compiler now knows
> exactly where the fields are located in the structure and does not have to
> deal with any of the macros and offsets and registration suggested here. Just
> cast the mbuf pointer into the new type mbuf structure. We just have to make
> sure the code that needs to use a given mbuf type has access to the structure
> definitions.
> 
> With the current proposal, we can imagine an API to ask to register a
> field at a specific offset. It can then be used in the application, so
> that accesses are done at no cost compared to a static field, because
> the offset would be const.
> 
> In the driver, the same logic could be used, but dynamically:
> 
>  if (offset == PREFERRED_OFFSET) {
>    /* code with static offset */
>  } else {
>    /* generic code */
>  }
> 
> But I'm not sure it would scale a lot if there are several features
> using dynamic fields.
> 
> > If the mbufs it going to be translated from one type mbuf to another mbuf
> > type, we just have to define that type and then cast the mbuf pointer to that
> > structure. When an mbuf is received from IPSec PMD then the application needs
> > to forward that mbuf to the next stage it can reset the type to 0 or to
> > another type filling in the reserved fields to be used by the next stage in
> > the pipeline.
> 
> What you describe is one use case.
> 
> What could be done with the API mentionned above (but I think it is
> dangerous), is to allow a user to register 2 different fields at the
> same offset, using a specific flag. This could work if the user knows
> that these 2 fields are never used at the same time.
> 
> The mbuf now contains the type and every point in the application can look at
> the type to determine how that mbuf is defined. I am sure there are some holes
> here, but I think it is a better solution then using all of these macros,
> offset values and registration APIs.
> 
> I'm not convinced having selective layouts is doable. The layouts cannot
> fit all possible use cases, and managing the different layouts in the
> driver looks difficult to me. Additionnaly, it does not solve the
> problem of mutually exclusive features.
> 
> I too at one time wanted some type of allocation or registration for
> private mbuf space and applying to these limited fields in the mbuf
> header may have been reasonable. The problem is using registration and
> moving that information between processes is going to be hard to get
> right. For a single Appliance model application it would work great
> and not in a non-appliance model applications.

I didn't get why it wouldn't work in a non-appliance model (are you
talking about primary/secondary processes?). Can you elaborate about
waht would be the problem?

> The type/structure
> method can help and it could have problems too, but using a
> type/struct design seems to be one of the BKMs (Best Known Methods) in
> the industry.

Sorry, but this is not a valid argument.

> To be honest it maybe we just take the hit in performance and add a
> third cache line as I am sure trying to squeeze metadata into these
> very limit fields will be a challenge IMO. I am not suggesting we add
> a cache line to every mbuf only to the pools that require the extra
> metadata by using the private space if that is reasonable. The
> applications needing a lot of metadata will just have to take the hit
> in performance anyway.

If an application wants to attach more data in the mbuf, we already have
the application private area. This zone is transparent from DPDK point
of view, it does not impact drivers or libs.

> Having to grab a metadata value via a set of macros and inline
> functions seems like it will consume more cycles then just a
> type/structure method as the compiler will help optimize the code
> without having to call any macros or inline functions.

Yes, I know that. This is the price to pay for solving the problems
(wasted size, exclusive features, avoid abi breakage). I answered in a
previous mail that the extra cost can be removed at application level if
we add an API to reserve a known offset. The ability to locate some
offload fields in the Rx part may also help to gain some cycles compared
to static fields.

Regards,
Olivier

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-09-23 16:16         ` Olivier Matz
@ 2019-09-23 17:14           ` Wiles, Keith
  0 siblings, 0 replies; 64+ messages in thread
From: Wiles, Keith @ 2019-09-23 17:14 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Thomas Monjalon, Wang, Haiyue, Stephen Hemminger,
	Andrew Rybchenko, Jerin Jacob Kollanukkaran



> On Sep 23, 2019, at 11:16 AM, Olivier Matz <olivier.matz@6wind.com> wrote:
> 
> Hi,
> 
> (reformated the quotes)
> 
> On Mon, Sep 23, 2019 at 03:14:51PM +0000, Wiles, Keith wrote:
>> 
>> 
>> On Sep 23, 2019, at 4:13 AM, Olivier Matz <olivier.matz@6wind.com<mailto:olivier.matz@6wind.com>> wrote:
>>> 
>>> Hi Keith,
>>> 
>>> On Sat, Sep 21, 2019 at 08:28:32AM +0000, Wiles, Keith wrote:
>>>> 
>>>> 
>>>> On Sep 18, 2019, at 6:54 PM, Olivier Matz <olivier.matz@6wind.com<mailto:olivier.matz@6wind.com>> wrote:
>>>> 
>>>>> Many features require to store data inside the mbuf. As the room in mbuf
>>>>> structure is limited, it is not possible to have a field for each
>>>>> feature. Also, changing fields in the mbuf structure can break the API
>>>>> or ABI.
>>>>> 
>>>>> This commit addresses these issues, by enabling the dynamic registration
>>>>> of fields or flags:
>>>>> 
>>>>> - a dynamic field is a named area in the rte_mbuf structure, with a
>>>>> given size (>= 1 byte) and alignment constraint.
>>>>> - a dynamic flag is a named bit in the rte_mbuf structure.
>>>>> 
>>>>> The typical use case is a PMD that registers space for an offload
>>>>> feature, when the application requests to enable this feature.  As
>>>>> the space in mbuf is limited, the space should only be reserved if it
>>>>> is going to be used (i.e when the application explicitly asks for it).
>>>>> 
>>>>> The registration can be done at any moment, but it is not possible
>>>>> to unregister fields or flags for now.
>>>>> 
>>>>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com<mailto:olivier.matz@6wind.com>>
>>>>> Acked-by: Thomas Monjalon <thomas@monjalon.net<mailto:thomas@monjalon.net>>
>>>> —
>>>> 
>>>> 
>>>> 
>>>> The idea of registration for space in the mbuf I am not a big fan. I did like
>>>> Konstantin’s suggestion of having the compiler help with optimizing the code,
>>>> but with a slight difference. Maybe I misunderstand, but now with this design
>>>> you have to pass the offsets to different parts of the application or place in
>>>> global memory or have each section request the offsets. It seems great if the
>>>> application is one big application or an appliance model application having
>>>> control of the whole design not so good for service chains like designs where
>>>> different parts of the whole application is design by different teams.
>>> 
>>> If the global variable storing the offset is defined in the mbuf layer, what
>>> would be the problem?
>> 
>> Are you assuming the values are shared between primary/secondary model or
>> between processes using shared memory? If moving the packet data via shared
>> memory to a different application written by a different company you still
>> have to move that metadata.
> 
> The dynamic mbuf proposal works with secondary processes. What does that
> change if the application is written by a different company? If you need
> to store a timestamp, you register the timestamp and the offset will be
> the same in primary and secondary.
> 
> 
>> If the type was carried with the mbuf we can easily convey a small
>> type value or we would need to tell the other side we have all of this
>> registration information to send. I would suggest the number of mbuf
>> types will be small over time and I believe a 4 bit or 8 bit type is
>> reasonable. In many protocols using a type value is used to convey
>> this type of information. We can even tightly control the number of
>> types DPDK controls and then leave some for user defined if we like.
> 
> 8 bits means 256 different mbuf layouts.

I also stated 4 bits, but it is a problem with using types we have to allow a number of them, but the smart thing is we restrict DPDK uses to only a few. The developer of other applications using DPDK can use any number they need.
> You did not replied to my previous questions:

Sorry I did not see a question other than a statement wrapped in a question.

The reply to the your question about storing the offsets in the mbuf layer is just extra data and APIs we have to test. The mbuf pool would have to carry this information or some way to associate the metadata of the metadata to the given mbuf. My point is the type is carried with the mbuf and then we have no question as to the type of metadata contained in the mbuf. Having metadata for the metadata for the application to grab or use even more macros or inlines its not going to make it easier for the developer only more complex. A type field will tell you exactly what and were the metadata is located in the mbuf header.
> 
> - what happens if you need a field from layout1 and another from layout2?
>  (ex: timestamp + ipsec, timestamp + seqn, seqn + ipsec, …)

At this point you need to be smart and use a single type instead of trying to merge two or three types or use cases. The type defines the valid fields, if we are changing the fields from one to another in a single mbuf instance or packet instance that will not happen in the cases you defined above. Multi-mbufs from say a PMD or application is not going to need to combine all of the above examples.

> - how do you implement the rx/tx drivers functions if you have to support
>  several layouts, where a field may be at a different offset?
> 
>>>> The only things you would have to do is:
>>>> 
>>>> 1/ ensure the offset is registered
>>>>  rte_mbuf_dyn_timestamp_register()
>>>> 
>>>> 2/ use helpers
>>>>  rte_mbuf_dyn_timestamp_get(), rte_mbuf_dyn_timestamp_set(), ...
>> 
>>> Konstantin’s suggest if I understand it was to use structures to allow the
>>> compiler to optimize the access to the mbuf and I like that idea, but with one
>>> change we add a field in the mbuf to define the mbuf structure type.
>>> 
>>> Say 0 is the standard rte_mbuf type then type 1 could be the IPSec offset type
>>> mbuf, type 2 could be something else, … The type 0 looks just like the mbuf we
>>> have today with maybe the optional fields set to reserved or some type of
>>> filler variables to reserve the holes in the structure. Then type 1 is the
>>> IPSec mbuf and in the reserved sections of the mbuf contain the IPSec related
>>> data with the standard mbuf fields still matching the type 0 version.
>> 
>> This very look like the "selective layout" in our presentation [1], page 14.
>> 
>> Your example talks about IPsec, but someone else will want to use a
>> sequence number, another one a timestamp, and another one will want to
>> use this space for its own application. There are a lot of use cases,
>> and it does not scale to have a layout for each of them. Worst, if
>> someone wants IPsec + a sequence number, how can it work?
>> 
>> One of the problem to solve is to avoid mutually exclusive feature (i.e.
>> union of fields that cannot be used together in the mbuf).
>> 
>> This allows the mbuf to be used by the developer and the compiler now knows
>> exactly where the fields are located in the structure and does not have to
>> deal with any of the macros and offsets and registration suggested here. Just
>> cast the mbuf pointer into the new type mbuf structure. We just have to make
>> sure the code that needs to use a given mbuf type has access to the structure
>> definitions.
>> 
>> With the current proposal, we can imagine an API to ask to register a
>> field at a specific offset. It can then be used in the application, so
>> that accesses are done at no cost compared to a static field, because
>> the offset would be const.
>> 
>> In the driver, the same logic could be used, but dynamically:
>> 
>> if (offset == PREFERRED_OFFSET) {
>>   /* code with static offset */
>> } else {
>>   /* generic code */
>> }
>> 
>> But I'm not sure it would scale a lot if there are several features
>> using dynamic fields.
>> 
>>> If the mbufs it going to be translated from one type mbuf to another mbuf
>>> type, we just have to define that type and then cast the mbuf pointer to that
>>> structure. When an mbuf is received from IPSec PMD then the application needs
>>> to forward that mbuf to the next stage it can reset the type to 0 or to
>>> another type filling in the reserved fields to be used by the next stage in
>>> the pipeline.
>> 
>> What you describe is one use case.
>> 
>> What could be done with the API mentionned above (but I think it is
>> dangerous), is to allow a user to register 2 different fields at the
>> same offset, using a specific flag. This could work if the user knows
>> that these 2 fields are never used at the same time.
>> 
>> The mbuf now contains the type and every point in the application can look at
>> the type to determine how that mbuf is defined. I am sure there are some holes
>> here, but I think it is a better solution then using all of these macros,
>> offset values and registration APIs.
>> 
>> I'm not convinced having selective layouts is doable. The layouts cannot
>> fit all possible use cases, and managing the different layouts in the
>> driver looks difficult to me. Additionnaly, it does not solve the
>> problem of mutually exclusive features.
>> 
>> I too at one time wanted some type of allocation or registration for
>> private mbuf space and applying to these limited fields in the mbuf
>> header may have been reasonable. The problem is using registration and
>> moving that information between processes is going to be hard to get
>> right. For a single Appliance model application it would work great
>> and not in a non-appliance model applications.
> 
> I didn't get why it wouldn't work in a non-appliance model (are you
> talking about primary/secondary processes?). Can you elaborate about
> waht would be the problem?

Let's look at something similar to VPP with nodes in a graph where each node needs to verify the metadata exists in that packet (extra macro/inline calls). This would be needed for every packet processed unless something else make sure only specific mbuf type is used. Why not just use a type field and cast the mbuf into the correct type structure and reject any mbufs that do not match the types that can be handled by this node. The type field would be a simple switch or if/else construct.
> 
>> The type/structure
>> method can help and it could have problems too, but using a
>> type/struct design seems to be one of the BKMs (Best Known Methods) in
>> the industry.
> 
> Sorry, but this is not a valid argument.

You assumption is not valid IMO. Sorry.

>> To be honest it maybe we just take the hit in performance and add a
>> third cache line as I am sure trying to squeeze metadata into these
>> very limit fields will be a challenge IMO. I am not suggesting we add
>> a cache line to every mbuf only to the pools that require the extra
>> metadata by using the private space if that is reasonable. The
>> applications needing a lot of metadata will just have to take the hit
>> in performance anyway.
> 
> If an application wants to attach more data in the mbuf, we already have
> the application private area. This zone is transparent from DPDK point
> of view, it does not impact drivers or libs.

We still have to have all parts of the system which accesses the metadata to know the private data exist and what it's given format. You solved this with offsets and registration to define the format. My suggestion is similar as it defines the location and type of metadata in a type/struct format. I feel it is easier to understand and too process in a high performance way.

> 
>> Having to grab a metadata value via a set of macros and inline
>> functions seems like it will consume more cycles then just a
>> type/structure method as the compiler will help optimize the code
>> without having to call any macros or inline functions.
> 
> Yes, I know that. This is the price to pay for solving the problems
> (wasted size, exclusive features, avoid abi breakage). I answered in a
> previous mail that the extra cost can be removed at application level if
> we add an API to reserve a known offset. The ability to locate some
> offload fields in the Rx part may also help to gain some cycles compared
> to static fields.

This maybe true, but I do not see how the cycles can be removed unless you create a structure layout in the application to access the metadata. Just saying it can be solved is OK, but proving it can be solved is the real question.

Anyway I am not going to argue with you, the community can decide if your solution solves the problem. In my case I do not see it solving it in the best way and my suggestion may not be the best either.
> 
> Regards,
> Olivier

Regards,
Keith


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-09-18 16:54 ` [dpdk-dev] [PATCH] " Olivier Matz
  2019-09-21  4:54   ` Wang, Haiyue
  2019-09-21  8:28   ` Wiles, Keith
@ 2019-10-01 10:49   ` Ananyev, Konstantin
  2019-10-17  7:54     ` Olivier Matz
  2 siblings, 1 reply; 64+ messages in thread
From: Ananyev, Konstantin @ 2019-10-01 10:49 UTC (permalink / raw)
  To: Olivier Matz, dev
  Cc: Thomas Monjalon, Wang, Haiyue, Stephen Hemminger,
	Andrew Rybchenko, Wiles, Keith, Jerin Jacob Kollanukkaran

Hi Olivier,

> Many features require to store data inside the mbuf. As the room in mbuf
> structure is limited, it is not possible to have a field for each
> feature. Also, changing fields in the mbuf structure can break the API
> or ABI.
> 
> This commit addresses these issues, by enabling the dynamic registration
> of fields or flags:
> 
> - a dynamic field is a named area in the rte_mbuf structure, with a
>   given size (>= 1 byte) and alignment constraint.
> - a dynamic flag is a named bit in the rte_mbuf structure.
> 
> The typical use case is a PMD that registers space for an offload
> feature, when the application requests to enable this feature.  As
> the space in mbuf is limited, the space should only be reserved if it
> is going to be used (i.e when the application explicitly asks for it).
> 
> The registration can be done at any moment, but it is not possible
> to unregister fields or flags for now.

Looks ok to me in general.
Some comments/suggestions inline.
Konstantin

> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> Acked-by: Thomas Monjalon <thomas@monjalon.net>
> ---
> 
> rfc -> v1
> 
> * Rebase on top of master
> * Change registration API to use a structure instead of
>   variables, getting rid of #defines (Stephen's comment)
> * Update flag registration to use a similar API as fields.
> * Change max name length from 32 to 64 (sugg. by Thomas)
> * Enhance API documentation (Haiyue's and Andrew's comments)
> * Add a debug log at registration
> * Add some words in release note
> * Did some performance tests (sugg. by Andrew):
>   On my platform, reading a dynamic field takes ~3 cycles more
>   than a static field, and ~2 cycles more for writing.
> 
>  app/test/test_mbuf.c                   | 114 ++++++-
>  doc/guides/rel_notes/release_19_11.rst |   7 +
>  lib/librte_mbuf/Makefile               |   2 +
>  lib/librte_mbuf/meson.build            |   6 +-
>  lib/librte_mbuf/rte_mbuf.h             |  25 +-
>  lib/librte_mbuf/rte_mbuf_dyn.c         | 408 +++++++++++++++++++++++++
>  lib/librte_mbuf/rte_mbuf_dyn.h         | 163 ++++++++++
>  lib/librte_mbuf/rte_mbuf_version.map   |   4 +
>  8 files changed, 724 insertions(+), 5 deletions(-)
>  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
>  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> 
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -198,9 +198,12 @@ extern "C" {
>  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
>  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
> 
> -/* add new RX flags here */
> +/* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> 
> -/* add new TX flags here */
> +#define PKT_FIRST_FREE (1ULL << 23)
> +#define PKT_LAST_FREE (1ULL << 39)
> +
> +/* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> 
>  /**
>   * Indicate that the metadata field in the mbuf is in use.
> @@ -738,6 +741,8 @@ struct rte_mbuf {
>  	 */
>  	struct rte_mbuf_ext_shared_info *shinfo;
> 
> +	uint64_t dynfield1; /**< Reserved for dynamic fields. */
> +	uint64_t dynfield2; /**< Reserved for dynamic fields. */

Wonder why just not one field:
	union {
		uint8_t u8[16];
		...
		uint64_t u64[2];
	} dyn_field1;
?
Probably would be a bit handy, to refer, register, etc. no?

>  } __rte_cache_aligned;
> 
>  /**
> @@ -1684,6 +1689,21 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
>   */
>  #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
> 
> +/**
> + * Copy dynamic fields from m_src to m_dst.
> + *
> + * @param m_dst
> + *   The destination mbuf.
> + * @param m_src
> + *   The source mbuf.
> + */
> +static inline void
> +rte_mbuf_dynfield_copy(struct rte_mbuf *m_dst, const struct rte_mbuf *m_src)
> +{
> +	m_dst->dynfield1 = m_src->dynfield1;
> +	m_dst->dynfield2 = m_src->dynfield2;
> +}
> +
>  /**
>   * Attach packet mbuf to another packet mbuf.
>   *
> @@ -1732,6 +1752,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
>  	mi->vlan_tci_outer = m->vlan_tci_outer;
>  	mi->tx_offload = m->tx_offload;
>  	mi->hash = m->hash;
> +	rte_mbuf_dynfield_copy(mi, m);
> 
>  	mi->next = NULL;
>  	mi->pkt_len = mi->data_len;
> diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c b/lib/librte_mbuf/rte_mbuf_dyn.c
> new file mode 100644
> index 000000000..13b8742d0
> --- /dev/null
> +++ b/lib/librte_mbuf/rte_mbuf_dyn.c
> @@ -0,0 +1,408 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2019 6WIND S.A.
> + */
> +
> +#include <sys/queue.h>
> +
> +#include <rte_common.h>
> +#include <rte_eal.h>
> +#include <rte_eal_memconfig.h>
> +#include <rte_tailq.h>
> +#include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_string_fns.h>
> +#include <rte_mbuf.h>
> +#include <rte_mbuf_dyn.h>
> +
> +#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
> +
> +struct mbuf_dynfield_elt {
> +	TAILQ_ENTRY(mbuf_dynfield_elt) next;
> +	struct rte_mbuf_dynfield params;
> +	int offset;

Why not 'size_t offset', to avoid any explicit conversions, etc?

> +};
> +TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem mbuf_dynfield_tailq = {
> +	.name = "RTE_MBUF_DYNFIELD",
> +};
> +EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
> +
> +struct mbuf_dynflag_elt {
> +	TAILQ_ENTRY(mbuf_dynflag_elt) next;
> +	struct rte_mbuf_dynflag params;
> +	int bitnum;
> +};
> +TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem mbuf_dynflag_tailq = {
> +	.name = "RTE_MBUF_DYNFLAG",
> +};
> +EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
> +
> +struct mbuf_dyn_shm {
> +	/** For each mbuf byte, free_space[i] == 1 if space is free. */
> +	uint8_t free_space[sizeof(struct rte_mbuf)];
> +	/** Bitfield of available flags. */
> +	uint64_t free_flags;
> +};
> +static struct mbuf_dyn_shm *shm;
> +
> +/* allocate and initialize the shared memory */
> +static int
> +init_shared_mem(void)
> +{
> +	const struct rte_memzone *mz;
> +	uint64_t mask;
> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
> +						sizeof(struct mbuf_dyn_shm),
> +						SOCKET_ID_ANY, 0,
> +						RTE_CACHE_LINE_SIZE);
> +	} else {
> +		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
> +	}
> +	if (mz == NULL)
> +		return -1;
> +
> +	shm = mz->addr;
> +
> +#define mark_free(field)						\
> +	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
> +		0xff, sizeof(((struct rte_mbuf *)0)->field))

I think you can avoid defining/unedifying macros here by something like that:

static const struct {
      size_t offset;
      size_t size;
} dyn_syms[] = {
    [0] = {.offset = offsetof(struct rte_mbuf, dynfield1), sizeof((struct rte_mbuf *)0)->dynfield1),
    [1] = {.offset = offsetof(struct rte_mbuf, dynfield2), sizeof((struct rte_mbuf *)0)->dynfield2),
};
...

for (i = 0; i != RTE_DIM(dyn_syms); i++)
    memset(shm->free_space + dym_syms[i].offset, UINT8_MAX, dym_syms[i].size);

> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		/* init free_space, keep it sync'd with
> +		 * rte_mbuf_dynfield_copy().
> +		 */
> +		memset(shm, 0, sizeof(*shm));
> +		mark_free(dynfield1);
> +		mark_free(dynfield2);
> +
> +		/* init free_flags */
> +		for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask <<= 1)
> +			shm->free_flags |= mask;
> +	}
> +#undef mark_free
> +
> +	return 0;
> +}
> +
> +/* check if this offset can be used */
> +static int
> +check_offset(size_t offset, size_t size, size_t align, unsigned int flags)
> +{
> +	size_t i;
> +
> +	(void)flags;


We have RTE_SET_USED() for such cases...
Though as it is an internal function probably better not to introduce
unused parameters at all.

> +
> +	if ((offset & (align - 1)) != 0)
> +		return -1;
> +	if (offset + size > sizeof(struct rte_mbuf))
> +		return -1;
> +
> +	for (i = 0; i < size; i++) {
> +		if (!shm->free_space[i + offset])
> +			return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +/* assume tailq is locked */
> +static struct mbuf_dynfield_elt *
> +__mbuf_dynfield_lookup(const char *name)
> +{
> +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> +	struct mbuf_dynfield_elt *mbuf_dynfield;
> +	struct rte_tailq_entry *te;
> +
> +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> +
> +	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
> +		mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
> +		if (strcmp(name, mbuf_dynfield->params.name) == 0)
> +			break;
> +	}
> +
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +
> +	return mbuf_dynfield;
> +}
> +
> +int
> +rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield *params)
> +{
> +	struct mbuf_dynfield_elt *mbuf_dynfield;
> +
> +	if (shm == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_read_lock();
> +	mbuf_dynfield = __mbuf_dynfield_lookup(name);
> +	rte_mcfg_tailq_read_unlock();
> +
> +	if (mbuf_dynfield == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	if (params != NULL)
> +		memcpy(params, &mbuf_dynfield->params, sizeof(*params));
> +
> +	return mbuf_dynfield->offset;
> +}
> +
> +static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
> +		const struct rte_mbuf_dynfield *params2)
> +{
> +	if (strcmp(params1->name, params2->name))
> +		return -1;
> +	if (params1->size != params2->size)
> +		return -1;
> +	if (params1->align != params2->align)
> +		return -1;
> +	if (params1->flags != params2->flags)
> +		return -1;
> +	return 0;
> +}
> +
> +int
> +rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params)

What I meant at user-space - if we can also have another function that would allow
user to specify required offset for dynfield explicitly, then user can define it as constant
value and let compiler do optimization work and hopefully generate faster code to access
this field.
Something like that:

int rte_mbuf_dynfiled_register_offset(const struct rte_mbuf_dynfield *params, size_t offset);

#define RTE_MBUF_DYNFIELD_OFFSET(fld, off)  (offsetof(struct rte_mbuf, fld) + (off))

And then somewhere in user code:

/* to let say reserve first 4B in dynfield1*/
#define MBUF_DYNFIELD_A	RTE_MBUF_DYNFIELD_OFFSET(dynfiled1, 0)
...
params.name = RTE_STR(MBUF_DYNFIELD_A);
params.size = sizeof(uint32_t);
params.align = sizeof(uint32_t);
ret = rte_mbuf_dynfiled_register_offset(&params, MBUF_DYNFIELD_A);
if (ret != MBUF_DYNFIELD_A)  {
     /* handle it somehow, probably just terminate gracefully... */
}
...

/* to let say reserve last 2B in dynfield2*/
#define MBUF_DYNFIELD_B	RTE_MBUF_DYNFIELD_OFFSET(dynfiled2, 6)
...
params.name = RTE_STR(MBUF_DYNFIELD_B);
params.size = sizeof(uint16_t);
params.align = sizeof(uint16_t);
ret = rte_mbuf_dynfiled_register_offset(&params, MBUF_DYNFIELD_B);

After that user can use constant offsets MBUF_DYNFIELD_A/ MBUF_DYNFIELD_B
to access these fields.
Same thoughts for DYNFLAG.

> +{
> +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> +	struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
> +	struct rte_tailq_entry *te = NULL;
> +	int offset, ret;

size_t offset
to avoid explicit conversions, etc.?

> +	size_t i;
> +
> +	if (shm == NULL && init_shared_mem() < 0)
> +		goto fail;

As I understand, here you allocate/initialize your shm without any lock protection,
though later you protect it via  rte_mcfg_tailq_write_lock().
That seems a bit flakey to me.
Why not to store information about free dynfield bytes inside mbuf_dynfield_tailq?
Let say  at init() create and add an entry into that list with some reserved name.
Then at register - grab mcfg_tailq_write_lock and do lookup
for such entry and then read/update it as needed.
It would help to avoid racing problem, plus you wouldn't need to
allocate/lookup for memzone.  


> +	if (params->size >= sizeof(struct rte_mbuf)) {
> +		rte_errno = EINVAL;
> +		goto fail;
> +	}
> +	if (!rte_is_power_of_2(params->align)) {
> +		rte_errno = EINVAL;
> +		goto fail;
> +	}
> +	if (params->flags != 0) {
> +		rte_errno = EINVAL;
> +		goto fail;
> +	}
> +
> +	rte_mcfg_tailq_write_lock();
> +

I think it probably would be cleaner and easier to read/maintain, if you'll put actual
code under lock protection into a separate function - as you did for __mbuf_dynfield_lookup().

> +	mbuf_dynfield = __mbuf_dynfield_lookup(params->name);
> +	if (mbuf_dynfield != NULL) {
> +		if (mbuf_dynfield_cmp(params, &mbuf_dynfield->params) < 0) {
> +			rte_errno = EEXIST;
> +			goto fail_unlock;
> +		}
> +		offset = mbuf_dynfield->offset;
> +		goto out_unlock;
> +	}
> +
> +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> +		rte_errno = EPERM;
> +		goto fail_unlock;
> +	}
> +
> +	for (offset = 0;
> +	     offset < (int)sizeof(struct rte_mbuf);
> +	     offset++) {
> +		if (check_offset(offset, params->size, params->align,
> +					params->flags) == 0)
> +			break;
> +	}
> +
> +	if (offset == sizeof(struct rte_mbuf)) {
> +		rte_errno = ENOENT;
> +		goto fail_unlock;
> +	}
> +
> +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> +
> +	te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL)
> +		goto fail_unlock;
> +
> +	mbuf_dynfield = rte_zmalloc("mbuf_dynfield", sizeof(*mbuf_dynfield), 0);
> +	if (mbuf_dynfield == NULL)
> +		goto fail_unlock;
> +
> +	ret = strlcpy(mbuf_dynfield->params.name, params->name,
> +		sizeof(mbuf_dynfield->params.name));
> +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->params.name)) {
> +		rte_errno = ENAMETOOLONG;
> +		goto fail_unlock;
> +	}
> +	memcpy(&mbuf_dynfield->params, params, sizeof(mbuf_dynfield->params));
> +	mbuf_dynfield->offset = offset;
> +	te->data = mbuf_dynfield;
> +
> +	TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
> +
> +	for (i = offset; i < offset + params->size; i++)
> +		shm->free_space[i] = 0;
> +
> +	RTE_LOG(DEBUG, MBUF, "Registered dynamic field %s (sz=%zu, al=%zu, fl=0x%x) -> %d\n",
> +		params->name, params->size, params->align, params->flags,
> +		offset);
> +
> +out_unlock:
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return offset;
> +
> +fail_unlock:
> +	rte_mcfg_tailq_write_unlock();
> +fail:
> +	rte_free(mbuf_dynfield);
> +	rte_free(te);
> +	return -1;
> +}
> +
> +/* assume tailq is locked */
> +static struct mbuf_dynflag_elt *
> +__mbuf_dynflag_lookup(const char *name)
> +{
> +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> +	struct mbuf_dynflag_elt *mbuf_dynflag;
> +	struct rte_tailq_entry *te;
> +
> +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> +
> +	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
> +		mbuf_dynflag = (struct mbuf_dynflag_elt *)te->data;
> +		if (strncmp(name, mbuf_dynflag->params.name,
> +				RTE_MBUF_DYN_NAMESIZE) == 0)
> +			break;
> +	}
> +
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +
> +	return mbuf_dynflag;
> +}
> +
> +int
> +rte_mbuf_dynflag_lookup(const char *name,
> +			struct rte_mbuf_dynflag *params)
> +{
> +	struct mbuf_dynflag_elt *mbuf_dynflag;
> +
> +	if (shm == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_read_lock();
> +	mbuf_dynflag = __mbuf_dynflag_lookup(name);
> +	rte_mcfg_tailq_read_unlock();
> +
> +	if (mbuf_dynflag == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	if (params != NULL)
> +		memcpy(params, &mbuf_dynflag->params, sizeof(*params));
> +
> +	return mbuf_dynflag->bitnum;
> +}
> +
> +static int mbuf_dynflag_cmp(const struct rte_mbuf_dynflag *params1,
> +		const struct rte_mbuf_dynflag *params2)
> +{
> +	if (strcmp(params1->name, params2->name))
> +		return -1;
> +	if (params1->flags != params2->flags)
> +		return -1;
> +	return 0;
> +}
> +
> +int
> +rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params)
> +{
> +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> +	struct mbuf_dynflag_elt *mbuf_dynflag = NULL;
> +	struct rte_tailq_entry *te = NULL;
> +	int bitnum, ret;
> +
> +	if (shm == NULL && init_shared_mem() < 0)
> +		goto fail;
> +
> +	rte_mcfg_tailq_write_lock();
> +
> +	mbuf_dynflag = __mbuf_dynflag_lookup(params->name);
> +	if (mbuf_dynflag != NULL) {
> +		if (mbuf_dynflag_cmp(params, &mbuf_dynflag->params) < 0) {
> +			rte_errno = EEXIST;
> +			goto fail_unlock;
> +		}
> +		bitnum = mbuf_dynflag->bitnum;
> +		goto out_unlock;
> +	}
> +
> +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> +		rte_errno = EPERM;
> +		goto fail_unlock;
> +	}
> +
> +	if (shm->free_flags == 0) {
> +		rte_errno = ENOENT;
> +		goto fail_unlock;
> +	}
> +	bitnum = rte_bsf64(shm->free_flags);
> +
> +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> +
> +	te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL)
> +		goto fail_unlock;
> +
> +	mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag), 0);
> +	if (mbuf_dynflag == NULL)
> +		goto fail_unlock;
> +
> +	ret = strlcpy(mbuf_dynflag->params.name, params->name,
> +		sizeof(mbuf_dynflag->params.name));
> +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->params.name)) {
> +		rte_errno = ENAMETOOLONG;
> +		goto fail_unlock;
> +	}
> +	mbuf_dynflag->bitnum = bitnum;
> +	te->data = mbuf_dynflag;
> +
> +	TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
> +
> +	shm->free_flags &= ~(1ULL << bitnum);
> +
> +	RTE_LOG(DEBUG, MBUF, "Registered dynamic flag %s (fl=0x%x) -> %u\n",
> +		params->name, params->flags, bitnum);
> +
> +out_unlock:
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return bitnum;
> +
> +fail_unlock:
> +	rte_mcfg_tailq_write_unlock();
> +fail:
> +	rte_free(mbuf_dynflag);
> +	rte_free(te);
> +	return -1;
> +}
> diff --git a/lib/librte_mbuf/rte_mbuf_dyn.h b/lib/librte_mbuf/rte_mbuf_dyn.h
> new file mode 100644
> index 000000000..6e2c81654
> --- /dev/null
> +++ b/lib/librte_mbuf/rte_mbuf_dyn.h
> @@ -0,0 +1,163 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2019 6WIND S.A.
> + */
> +
> +#ifndef _RTE_MBUF_DYN_H_
> +#define _RTE_MBUF_DYN_H_
> +
> +/**
> + * @file
> + * RTE Mbuf dynamic fields and flags
> + *
> + * Many features require to store data inside the mbuf. As the room in
> + * mbuf structure is limited, it is not possible to have a field for
> + * each feature. Also, changing fields in the mbuf structure can break
> + * the API or ABI.
> + *
> + * This module addresses this issue, by enabling the dynamic
> + * registration of fields or flags:
> + *
> + * - a dynamic field is a named area in the rte_mbuf structure, with a
> + *   given size (>= 1 byte) and alignment constraint.
> + * - a dynamic flag is a named bit in the rte_mbuf structure, stored
> + *   in mbuf->ol_flags.
> + *
> + * The typical use case is when a specific offload feature requires to
> + * register a dedicated offload field in the mbuf structure, and adding
> + * a static field or flag is not justified.
> + *
> + * Example of use:
> + *
> + * - A rte_mbuf_dynfield structure is defined, containing the parameters
> + *   of the dynamic field to be registered:
> + *   const struct rte_mbuf_dynfield rte_dynfield_my_feature = { ... };
> + * - The application initializes the PMD, and asks for this feature
> + *   at port initialization by passing DEV_RX_OFFLOAD_MY_FEATURE in
> + *   rxconf. This will make the PMD to register the field by calling
> + *   rte_mbuf_dynfield_register(&rte_dynfield_my_feature). The PMD
> + *   stores the returned offset.
> + * - The application that uses the offload feature also registers
> + *   the field to retrieve the same offset.
> + * - When the PMD receives a packet, it can set the field:
> + *   *RTE_MBUF_DYNFIELD(m, offset, <type *>) = value;
> + * - In the main loop, the application can retrieve the value with
> + *   the same macro.
> + *
> + * To avoid wasting space, the dynamic fields or flags must only be
> + * reserved on demand, when an application asks for the related feature.
> + *
> + * The registration can be done at any moment, but it is not possible
> + * to unregister fields or flags for now.
> + *
> + * A dynamic field can be reserved and used by an application only.
> + * It can for instance be a packet mark.
> + */
> +
> +#include <sys/types.h>
> +/**
> + * Maximum length of the dynamic field or flag string.
> + */
> +#define RTE_MBUF_DYN_NAMESIZE 64
> +
> +/**
> + * Structure describing the parameters of a mbuf dynamic field.
> + */
> +struct rte_mbuf_dynfield {
> +	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the field. */
> +	size_t size;        /**< The number of bytes to reserve. */
> +	size_t align;       /**< The alignment constraint (power of 2). */
> +	unsigned int flags; /**< Reserved for future use, must be 0. */
> +};
> +
> +/**
> + * Structure describing the parameters of a mbuf dynamic flag.
> + */
> +struct rte_mbuf_dynflag {
> +	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the dynamic flag. */
> +	unsigned int flags; /**< Reserved for future use, must be 0. */
> +};
> +
> +/**
> + * Register space for a dynamic field in the mbuf structure.
> + *
> + * If the field is already registered (same name and parameters), its
> + * offset is returned.
> + *
> + * @param params
> + *   A structure containing the requested parameters (name, size,
> + *   alignment constraint and flags).
> + * @return
> + *   The offset in the mbuf structure, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - EINVAL: invalid parameters (size, align, or flags).
> + *   - EEXIST: this name is already register with different parameters.
> + *   - EPERM: called from a secondary process.
> + *   - ENOENT: not enough room in mbuf.
> + *   - ENOMEM: allocation failure.
> + *   - ENAMETOOLONG: name does not ends with \0.
> + */
> +__rte_experimental
> +int rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params);
> +
> +/**
> + * Lookup for a registered dynamic mbuf field.
> + *
> + * @param name
> + *   A string identifying the dynamic field.
> + * @param params
> + *   If not NULL, and if the lookup is successful, the structure is
> + *   filled with the parameters of the dynamic field.
> + * @return
> + *   The offset of this field in the mbuf structure, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - ENOENT: no dynamic field matches this name.
> + */
> +__rte_experimental
> +int rte_mbuf_dynfield_lookup(const char *name,
> +			struct rte_mbuf_dynfield *params);
> +
> +/**
> + * Register a dynamic flag in the mbuf structure.
> + *
> + * If the flag is already registered (same name and parameters), its
> + * offset is returned.
> + *
> + * @param params
> + *   A structure containing the requested parameters of the dynamic
> + *   flag (name and options).
> + * @return
> + *   The number of the reserved bit, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - EINVAL: invalid parameters (size, align, or flags).
> + *   - EEXIST: this name is already register with different parameters.
> + *   - EPERM: called from a secondary process.
> + *   - ENOENT: no more flag available.
> + *   - ENOMEM: allocation failure.
> + *   - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE - 1.
> + */
> +__rte_experimental
> +int rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params);
> +
> +/**
> + * Lookup for a registered dynamic mbuf flag.
> + *
> + * @param name
> + *   A string identifying the dynamic flag.
> + * @param params
> + *   If not NULL, and if the lookup is successful, the structure is
> + *   filled with the parameters of the dynamic flag.
> + * @return
> + *   The offset of this flag in the mbuf structure, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - ENOENT: no dynamic flag matches this name.
> + */
> +__rte_experimental
> +int rte_mbuf_dynflag_lookup(const char *name,
> +			struct rte_mbuf_dynflag *params);
> +
> +/**
> + * Helper macro to access to a dynamic field.
> + */
> +#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) + (offset)))
> +
> +#endif
> diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
> index 2662a37bf..a98310570 100644
> --- a/lib/librte_mbuf/rte_mbuf_version.map
> +++ b/lib/librte_mbuf/rte_mbuf_version.map
> @@ -50,4 +50,8 @@ EXPERIMENTAL {
>  	global:
> 
>  	rte_mbuf_check;
> +	rte_mbuf_dynfield_lookup;
> +	rte_mbuf_dynfield_register;
> +	rte_mbuf_dynflag_lookup;
> +	rte_mbuf_dynflag_register;
>  } DPDK_18.08;
> --
> 2.20.1


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-10-01 10:49   ` Ananyev, Konstantin
@ 2019-10-17  7:54     ` Olivier Matz
  2019-10-17 11:58       ` Ananyev, Konstantin
  0 siblings, 1 reply; 64+ messages in thread
From: Olivier Matz @ 2019-10-17  7:54 UTC (permalink / raw)
  To: Ananyev, Konstantin
  Cc: dev, Thomas Monjalon, Wang, Haiyue, Stephen Hemminger,
	Andrew Rybchenko, Wiles, Keith, Jerin Jacob Kollanukkaran

Hi Konstantin,

Thanks for the feedback. Please see my answers below.

On Tue, Oct 01, 2019 at 10:49:39AM +0000, Ananyev, Konstantin wrote:
> Hi Olivier,
> 
> > Many features require to store data inside the mbuf. As the room in mbuf
> > structure is limited, it is not possible to have a field for each
> > feature. Also, changing fields in the mbuf structure can break the API
> > or ABI.
> > 
> > This commit addresses these issues, by enabling the dynamic registration
> > of fields or flags:
> > 
> > - a dynamic field is a named area in the rte_mbuf structure, with a
> >   given size (>= 1 byte) and alignment constraint.
> > - a dynamic flag is a named bit in the rte_mbuf structure.
> > 
> > The typical use case is a PMD that registers space for an offload
> > feature, when the application requests to enable this feature.  As
> > the space in mbuf is limited, the space should only be reserved if it
> > is going to be used (i.e when the application explicitly asks for it).
> > 
> > The registration can be done at any moment, but it is not possible
> > to unregister fields or flags for now.
> 
> Looks ok to me in general.
> Some comments/suggestions inline.
> Konstantin
> 
> > 
> > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > ---
> > 
> > rfc -> v1
> > 
> > * Rebase on top of master
> > * Change registration API to use a structure instead of
> >   variables, getting rid of #defines (Stephen's comment)
> > * Update flag registration to use a similar API as fields.
> > * Change max name length from 32 to 64 (sugg. by Thomas)
> > * Enhance API documentation (Haiyue's and Andrew's comments)
> > * Add a debug log at registration
> > * Add some words in release note
> > * Did some performance tests (sugg. by Andrew):
> >   On my platform, reading a dynamic field takes ~3 cycles more
> >   than a static field, and ~2 cycles more for writing.
> > 
> >  app/test/test_mbuf.c                   | 114 ++++++-
> >  doc/guides/rel_notes/release_19_11.rst |   7 +
> >  lib/librte_mbuf/Makefile               |   2 +
> >  lib/librte_mbuf/meson.build            |   6 +-
> >  lib/librte_mbuf/rte_mbuf.h             |  25 +-
> >  lib/librte_mbuf/rte_mbuf_dyn.c         | 408 +++++++++++++++++++++++++
> >  lib/librte_mbuf/rte_mbuf_dyn.h         | 163 ++++++++++
> >  lib/librte_mbuf/rte_mbuf_version.map   |   4 +
> >  8 files changed, 724 insertions(+), 5 deletions(-)
> >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
> >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> > 
> > --- a/lib/librte_mbuf/rte_mbuf.h
> > +++ b/lib/librte_mbuf/rte_mbuf.h
> > @@ -198,9 +198,12 @@ extern "C" {
> >  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
> >  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
> > 
> > -/* add new RX flags here */
> > +/* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> > 
> > -/* add new TX flags here */
> > +#define PKT_FIRST_FREE (1ULL << 23)
> > +#define PKT_LAST_FREE (1ULL << 39)
> > +
> > +/* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> > 
> >  /**
> >   * Indicate that the metadata field in the mbuf is in use.
> > @@ -738,6 +741,8 @@ struct rte_mbuf {
> >  	 */
> >  	struct rte_mbuf_ext_shared_info *shinfo;
> > 
> > +	uint64_t dynfield1; /**< Reserved for dynamic fields. */
> > +	uint64_t dynfield2; /**< Reserved for dynamic fields. */
> 
> Wonder why just not one field:
> 	union {
> 		uint8_t u8[16];
> 		...
> 		uint64_t u64[2];
> 	} dyn_field1;
> ?
> Probably would be a bit handy, to refer, register, etc. no?

I didn't find any place where we need an access through u8, so I
just changed it into uint64_t dynfield1[2].


> 
> >  } __rte_cache_aligned;
> > 
> >  /**
> > @@ -1684,6 +1689,21 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
> >   */
> >  #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
> > 
> > +/**
> > + * Copy dynamic fields from m_src to m_dst.
> > + *
> > + * @param m_dst
> > + *   The destination mbuf.
> > + * @param m_src
> > + *   The source mbuf.
> > + */
> > +static inline void
> > +rte_mbuf_dynfield_copy(struct rte_mbuf *m_dst, const struct rte_mbuf *m_src)
> > +{
> > +	m_dst->dynfield1 = m_src->dynfield1;
> > +	m_dst->dynfield2 = m_src->dynfield2;
> > +}
> > +
> >  /**
> >   * Attach packet mbuf to another packet mbuf.
> >   *
> > @@ -1732,6 +1752,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
> >  	mi->vlan_tci_outer = m->vlan_tci_outer;
> >  	mi->tx_offload = m->tx_offload;
> >  	mi->hash = m->hash;
> > +	rte_mbuf_dynfield_copy(mi, m);
> > 
> >  	mi->next = NULL;
> >  	mi->pkt_len = mi->data_len;
> > diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c b/lib/librte_mbuf/rte_mbuf_dyn.c
> > new file mode 100644
> > index 000000000..13b8742d0
> > --- /dev/null
> > +++ b/lib/librte_mbuf/rte_mbuf_dyn.c
> > @@ -0,0 +1,408 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright 2019 6WIND S.A.
> > + */
> > +
> > +#include <sys/queue.h>
> > +
> > +#include <rte_common.h>
> > +#include <rte_eal.h>
> > +#include <rte_eal_memconfig.h>
> > +#include <rte_tailq.h>
> > +#include <rte_errno.h>
> > +#include <rte_malloc.h>
> > +#include <rte_string_fns.h>
> > +#include <rte_mbuf.h>
> > +#include <rte_mbuf_dyn.h>
> > +
> > +#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
> > +
> > +struct mbuf_dynfield_elt {
> > +	TAILQ_ENTRY(mbuf_dynfield_elt) next;
> > +	struct rte_mbuf_dynfield params;
> > +	int offset;
> 
> Why not 'size_t offset', to avoid any explicit conversions, etc?

Fixed


> > +};
> > +TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
> > +
> > +static struct rte_tailq_elem mbuf_dynfield_tailq = {
> > +	.name = "RTE_MBUF_DYNFIELD",
> > +};
> > +EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
> > +
> > +struct mbuf_dynflag_elt {
> > +	TAILQ_ENTRY(mbuf_dynflag_elt) next;
> > +	struct rte_mbuf_dynflag params;
> > +	int bitnum;
> > +};
> > +TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
> > +
> > +static struct rte_tailq_elem mbuf_dynflag_tailq = {
> > +	.name = "RTE_MBUF_DYNFLAG",
> > +};
> > +EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
> > +
> > +struct mbuf_dyn_shm {
> > +	/** For each mbuf byte, free_space[i] == 1 if space is free. */
> > +	uint8_t free_space[sizeof(struct rte_mbuf)];
> > +	/** Bitfield of available flags. */
> > +	uint64_t free_flags;
> > +};
> > +static struct mbuf_dyn_shm *shm;
> > +
> > +/* allocate and initialize the shared memory */
> > +static int
> > +init_shared_mem(void)
> > +{
> > +	const struct rte_memzone *mz;
> > +	uint64_t mask;
> > +
> > +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > +		mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
> > +						sizeof(struct mbuf_dyn_shm),
> > +						SOCKET_ID_ANY, 0,
> > +						RTE_CACHE_LINE_SIZE);
> > +	} else {
> > +		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
> > +	}
> > +	if (mz == NULL)
> > +		return -1;
> > +
> > +	shm = mz->addr;
> > +
> > +#define mark_free(field)						\
> > +	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
> > +		0xff, sizeof(((struct rte_mbuf *)0)->field))
> 
> I think you can avoid defining/unedifying macros here by something like that:
> 
> static const struct {
>       size_t offset;
>       size_t size;
> } dyn_syms[] = {
>     [0] = {.offset = offsetof(struct rte_mbuf, dynfield1), sizeof((struct rte_mbuf *)0)->dynfield1),
>     [1] = {.offset = offsetof(struct rte_mbuf, dynfield2), sizeof((struct rte_mbuf *)0)->dynfield2),
> };
> ...
> 
> for (i = 0; i != RTE_DIM(dyn_syms); i++)
>     memset(shm->free_space + dym_syms[i].offset, UINT8_MAX, dym_syms[i].size);
> 

I tried it, but the following lines are too long
     [0] = {offsetof(struct rte_mbuf, dynfield1), sizeof((struct rte_mbuf *)0)->dynfield1),
     [1] = {offsetof(struct rte_mbuf, dynfield2), sizeof((struct rte_mbuf *)0)->dynfield2),
To make them shorter, we can use a macro... but... wait :)

> > +
> > +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > +		/* init free_space, keep it sync'd with
> > +		 * rte_mbuf_dynfield_copy().
> > +		 */
> > +		memset(shm, 0, sizeof(*shm));
> > +		mark_free(dynfield1);
> > +		mark_free(dynfield2);
> > +
> > +		/* init free_flags */
> > +		for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask <<= 1)
> > +			shm->free_flags |= mask;
> > +	}
> > +#undef mark_free
> > +
> > +	return 0;
> > +}
> > +
> > +/* check if this offset can be used */
> > +static int
> > +check_offset(size_t offset, size_t size, size_t align, unsigned int flags)
> > +{
> > +	size_t i;
> > +
> > +	(void)flags;
> 
> 
> We have RTE_SET_USED() for such cases...
> Though as it is an internal function probably better not to introduce
> unused parameters at all.

I removed the flag parameter as you suggested.


> > +
> > +	if ((offset & (align - 1)) != 0)
> > +		return -1;
> > +	if (offset + size > sizeof(struct rte_mbuf))
> > +		return -1;
> > +
> > +	for (i = 0; i < size; i++) {
> > +		if (!shm->free_space[i + offset])
> > +			return -1;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +/* assume tailq is locked */
> > +static struct mbuf_dynfield_elt *
> > +__mbuf_dynfield_lookup(const char *name)
> > +{
> > +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> > +	struct mbuf_dynfield_elt *mbuf_dynfield;
> > +	struct rte_tailq_entry *te;
> > +
> > +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> > +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> > +
> > +	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
> > +		mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
> > +		if (strcmp(name, mbuf_dynfield->params.name) == 0)
> > +			break;
> > +	}
> > +
> > +	if (te == NULL) {
> > +		rte_errno = ENOENT;
> > +		return NULL;
> > +	}
> > +
> > +	return mbuf_dynfield;
> > +}
> > +
> > +int
> > +rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield *params)
> > +{
> > +	struct mbuf_dynfield_elt *mbuf_dynfield;
> > +
> > +	if (shm == NULL) {
> > +		rte_errno = ENOENT;
> > +		return -1;
> > +	}
> > +
> > +	rte_mcfg_tailq_read_lock();
> > +	mbuf_dynfield = __mbuf_dynfield_lookup(name);
> > +	rte_mcfg_tailq_read_unlock();
> > +
> > +	if (mbuf_dynfield == NULL) {
> > +		rte_errno = ENOENT;
> > +		return -1;
> > +	}
> > +
> > +	if (params != NULL)
> > +		memcpy(params, &mbuf_dynfield->params, sizeof(*params));
> > +
> > +	return mbuf_dynfield->offset;
> > +}
> > +
> > +static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
> > +		const struct rte_mbuf_dynfield *params2)
> > +{
> > +	if (strcmp(params1->name, params2->name))
> > +		return -1;
> > +	if (params1->size != params2->size)
> > +		return -1;
> > +	if (params1->align != params2->align)
> > +		return -1;
> > +	if (params1->flags != params2->flags)
> > +		return -1;
> > +	return 0;
> > +}
> > +
> > +int
> > +rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params)
> 
> What I meant at user-space - if we can also have another function that would allow
> user to specify required offset for dynfield explicitly, then user can define it as constant
> value and let compiler do optimization work and hopefully generate faster code to access
> this field.
> Something like that:
> 
> int rte_mbuf_dynfiled_register_offset(const struct rte_mbuf_dynfield *params, size_t offset);
> 
> #define RTE_MBUF_DYNFIELD_OFFSET(fld, off)  (offsetof(struct rte_mbuf, fld) + (off))
> 
> And then somewhere in user code:
> 
> /* to let say reserve first 4B in dynfield1*/
> #define MBUF_DYNFIELD_A	RTE_MBUF_DYNFIELD_OFFSET(dynfiled1, 0)
> ...
> params.name = RTE_STR(MBUF_DYNFIELD_A);
> params.size = sizeof(uint32_t);
> params.align = sizeof(uint32_t);
> ret = rte_mbuf_dynfiled_register_offset(&params, MBUF_DYNFIELD_A);
> if (ret != MBUF_DYNFIELD_A)  {
>      /* handle it somehow, probably just terminate gracefully... */
> }
> ...
> 
> /* to let say reserve last 2B in dynfield2*/
> #define MBUF_DYNFIELD_B	RTE_MBUF_DYNFIELD_OFFSET(dynfiled2, 6)
> ...
> params.name = RTE_STR(MBUF_DYNFIELD_B);
> params.size = sizeof(uint16_t);
> params.align = sizeof(uint16_t);
> ret = rte_mbuf_dynfiled_register_offset(&params, MBUF_DYNFIELD_B);
> 
> After that user can use constant offsets MBUF_DYNFIELD_A/ MBUF_DYNFIELD_B
> to access these fields.
> Same thoughts for DYNFLAG.

I added the feature in v2.


> > +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> > +	struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
> > +	struct rte_tailq_entry *te = NULL;
> > +	int offset, ret;
> 
> size_t offset
> to avoid explicit conversions, etc.?
> 

Fixed.


> > +	size_t i;
> > +
> > +	if (shm == NULL && init_shared_mem() < 0)
> > +		goto fail;
> 
> As I understand, here you allocate/initialize your shm without any lock protection,
> though later you protect it via  rte_mcfg_tailq_write_lock().
> That seems a bit flakey to me.
> Why not to store information about free dynfield bytes inside mbuf_dynfield_tailq?
> Let say  at init() create and add an entry into that list with some reserved name.
> Then at register - grab mcfg_tailq_write_lock and do lookup
> for such entry and then read/update it as needed.
> It would help to avoid racing problem, plus you wouldn't need to
> allocate/lookup for memzone.  

I don't quite like the idea of having a special entry with a different type
in an element list. Despite it is simpler for a locking perspective, it is
less obvious for the developper.

Also, I changed the way a zone is reserved to return the one that have the
less impact on next reservation, and I feel it is easier to implement with
the shared memory.

So, I just moved the init_shared_mem() inside the rte_mcfg_tailq_write_lock(),
it should do the job.


> > +	if (params->size >= sizeof(struct rte_mbuf)) {
> > +		rte_errno = EINVAL;
> > +		goto fail;
> > +	}
> > +	if (!rte_is_power_of_2(params->align)) {
> > +		rte_errno = EINVAL;
> > +		goto fail;
> > +	}
> > +	if (params->flags != 0) {
> > +		rte_errno = EINVAL;
> > +		goto fail;
> > +	}
> > +
> > +	rte_mcfg_tailq_write_lock();
> > +
> 
> I think it probably would be cleaner and easier to read/maintain, if you'll put actual
> code under lock protection into a separate function - as you did for __mbuf_dynfield_lookup().

Yes, I did that, it should be clearer now.


> > +	mbuf_dynfield = __mbuf_dynfield_lookup(params->name);
> > +	if (mbuf_dynfield != NULL) {
> > +		if (mbuf_dynfield_cmp(params, &mbuf_dynfield->params) < 0) {
> > +			rte_errno = EEXIST;
> > +			goto fail_unlock;
> > +		}
> > +		offset = mbuf_dynfield->offset;
> > +		goto out_unlock;
> > +	}
> > +
> > +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> > +		rte_errno = EPERM;
> > +		goto fail_unlock;
> > +	}
> > +
> > +	for (offset = 0;
> > +	     offset < (int)sizeof(struct rte_mbuf);
> > +	     offset++) {
> > +		if (check_offset(offset, params->size, params->align,
> > +					params->flags) == 0)
> > +			break;
> > +	}
> > +
> > +	if (offset == sizeof(struct rte_mbuf)) {
> > +		rte_errno = ENOENT;
> > +		goto fail_unlock;
> > +	}
> > +
> > +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> > +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> > +
> > +	te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
> > +	if (te == NULL)
> > +		goto fail_unlock;
> > +
> > +	mbuf_dynfield = rte_zmalloc("mbuf_dynfield", sizeof(*mbuf_dynfield), 0);
> > +	if (mbuf_dynfield == NULL)
> > +		goto fail_unlock;
> > +
> > +	ret = strlcpy(mbuf_dynfield->params.name, params->name,
> > +		sizeof(mbuf_dynfield->params.name));
> > +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->params.name)) {
> > +		rte_errno = ENAMETOOLONG;
> > +		goto fail_unlock;
> > +	}
> > +	memcpy(&mbuf_dynfield->params, params, sizeof(mbuf_dynfield->params));
> > +	mbuf_dynfield->offset = offset;
> > +	te->data = mbuf_dynfield;
> > +
> > +	TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
> > +
> > +	for (i = offset; i < offset + params->size; i++)
> > +		shm->free_space[i] = 0;
> > +
> > +	RTE_LOG(DEBUG, MBUF, "Registered dynamic field %s (sz=%zu, al=%zu, fl=0x%x) -> %d\n",
> > +		params->name, params->size, params->align, params->flags,
> > +		offset);
> > +
> > +out_unlock:
> > +	rte_mcfg_tailq_write_unlock();
> > +
> > +	return offset;
> > +
> > +fail_unlock:
> > +	rte_mcfg_tailq_write_unlock();
> > +fail:
> > +	rte_free(mbuf_dynfield);
> > +	rte_free(te);
> > +	return -1;
> > +}
> > +
> > +/* assume tailq is locked */
> > +static struct mbuf_dynflag_elt *
> > +__mbuf_dynflag_lookup(const char *name)
> > +{
> > +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> > +	struct mbuf_dynflag_elt *mbuf_dynflag;
> > +	struct rte_tailq_entry *te;
> > +
> > +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> > +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> > +
> > +	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
> > +		mbuf_dynflag = (struct mbuf_dynflag_elt *)te->data;
> > +		if (strncmp(name, mbuf_dynflag->params.name,
> > +				RTE_MBUF_DYN_NAMESIZE) == 0)
> > +			break;
> > +	}
> > +
> > +	if (te == NULL) {
> > +		rte_errno = ENOENT;
> > +		return NULL;
> > +	}
> > +
> > +	return mbuf_dynflag;
> > +}
> > +
> > +int
> > +rte_mbuf_dynflag_lookup(const char *name,
> > +			struct rte_mbuf_dynflag *params)
> > +{
> > +	struct mbuf_dynflag_elt *mbuf_dynflag;
> > +
> > +	if (shm == NULL) {
> > +		rte_errno = ENOENT;
> > +		return -1;
> > +	}
> > +
> > +	rte_mcfg_tailq_read_lock();
> > +	mbuf_dynflag = __mbuf_dynflag_lookup(name);
> > +	rte_mcfg_tailq_read_unlock();
> > +
> > +	if (mbuf_dynflag == NULL) {
> > +		rte_errno = ENOENT;
> > +		return -1;
> > +	}
> > +
> > +	if (params != NULL)
> > +		memcpy(params, &mbuf_dynflag->params, sizeof(*params));
> > +
> > +	return mbuf_dynflag->bitnum;
> > +}
> > +
> > +static int mbuf_dynflag_cmp(const struct rte_mbuf_dynflag *params1,
> > +		const struct rte_mbuf_dynflag *params2)
> > +{
> > +	if (strcmp(params1->name, params2->name))
> > +		return -1;
> > +	if (params1->flags != params2->flags)
> > +		return -1;
> > +	return 0;
> > +}
> > +
> > +int
> > +rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params)
> > +{
> > +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> > +	struct mbuf_dynflag_elt *mbuf_dynflag = NULL;
> > +	struct rte_tailq_entry *te = NULL;
> > +	int bitnum, ret;
> > +
> > +	if (shm == NULL && init_shared_mem() < 0)
> > +		goto fail;
> > +
> > +	rte_mcfg_tailq_write_lock();
> > +
> > +	mbuf_dynflag = __mbuf_dynflag_lookup(params->name);
> > +	if (mbuf_dynflag != NULL) {
> > +		if (mbuf_dynflag_cmp(params, &mbuf_dynflag->params) < 0) {
> > +			rte_errno = EEXIST;
> > +			goto fail_unlock;
> > +		}
> > +		bitnum = mbuf_dynflag->bitnum;
> > +		goto out_unlock;
> > +	}
> > +
> > +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> > +		rte_errno = EPERM;
> > +		goto fail_unlock;
> > +	}
> > +
> > +	if (shm->free_flags == 0) {
> > +		rte_errno = ENOENT;
> > +		goto fail_unlock;
> > +	}
> > +	bitnum = rte_bsf64(shm->free_flags);
> > +
> > +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> > +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> > +
> > +	te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
> > +	if (te == NULL)
> > +		goto fail_unlock;
> > +
> > +	mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag), 0);
> > +	if (mbuf_dynflag == NULL)
> > +		goto fail_unlock;
> > +
> > +	ret = strlcpy(mbuf_dynflag->params.name, params->name,
> > +		sizeof(mbuf_dynflag->params.name));
> > +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->params.name)) {
> > +		rte_errno = ENAMETOOLONG;
> > +		goto fail_unlock;
> > +	}
> > +	mbuf_dynflag->bitnum = bitnum;
> > +	te->data = mbuf_dynflag;
> > +
> > +	TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
> > +
> > +	shm->free_flags &= ~(1ULL << bitnum);
> > +
> > +	RTE_LOG(DEBUG, MBUF, "Registered dynamic flag %s (fl=0x%x) -> %u\n",
> > +		params->name, params->flags, bitnum);
> > +
> > +out_unlock:
> > +	rte_mcfg_tailq_write_unlock();
> > +
> > +	return bitnum;
> > +
> > +fail_unlock:
> > +	rte_mcfg_tailq_write_unlock();
> > +fail:
> > +	rte_free(mbuf_dynflag);
> > +	rte_free(te);
> > +	return -1;
> > +}
> > diff --git a/lib/librte_mbuf/rte_mbuf_dyn.h b/lib/librte_mbuf/rte_mbuf_dyn.h
> > new file mode 100644
> > index 000000000..6e2c81654
> > --- /dev/null
> > +++ b/lib/librte_mbuf/rte_mbuf_dyn.h
> > @@ -0,0 +1,163 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright 2019 6WIND S.A.
> > + */
> > +
> > +#ifndef _RTE_MBUF_DYN_H_
> > +#define _RTE_MBUF_DYN_H_
> > +
> > +/**
> > + * @file
> > + * RTE Mbuf dynamic fields and flags
> > + *
> > + * Many features require to store data inside the mbuf. As the room in
> > + * mbuf structure is limited, it is not possible to have a field for
> > + * each feature. Also, changing fields in the mbuf structure can break
> > + * the API or ABI.
> > + *
> > + * This module addresses this issue, by enabling the dynamic
> > + * registration of fields or flags:
> > + *
> > + * - a dynamic field is a named area in the rte_mbuf structure, with a
> > + *   given size (>= 1 byte) and alignment constraint.
> > + * - a dynamic flag is a named bit in the rte_mbuf structure, stored
> > + *   in mbuf->ol_flags.
> > + *
> > + * The typical use case is when a specific offload feature requires to
> > + * register a dedicated offload field in the mbuf structure, and adding
> > + * a static field or flag is not justified.
> > + *
> > + * Example of use:
> > + *
> > + * - A rte_mbuf_dynfield structure is defined, containing the parameters
> > + *   of the dynamic field to be registered:
> > + *   const struct rte_mbuf_dynfield rte_dynfield_my_feature = { ... };
> > + * - The application initializes the PMD, and asks for this feature
> > + *   at port initialization by passing DEV_RX_OFFLOAD_MY_FEATURE in
> > + *   rxconf. This will make the PMD to register the field by calling
> > + *   rte_mbuf_dynfield_register(&rte_dynfield_my_feature). The PMD
> > + *   stores the returned offset.
> > + * - The application that uses the offload feature also registers
> > + *   the field to retrieve the same offset.
> > + * - When the PMD receives a packet, it can set the field:
> > + *   *RTE_MBUF_DYNFIELD(m, offset, <type *>) = value;
> > + * - In the main loop, the application can retrieve the value with
> > + *   the same macro.
> > + *
> > + * To avoid wasting space, the dynamic fields or flags must only be
> > + * reserved on demand, when an application asks for the related feature.
> > + *
> > + * The registration can be done at any moment, but it is not possible
> > + * to unregister fields or flags for now.
> > + *
> > + * A dynamic field can be reserved and used by an application only.
> > + * It can for instance be a packet mark.
> > + */
> > +
> > +#include <sys/types.h>
> > +/**
> > + * Maximum length of the dynamic field or flag string.
> > + */
> > +#define RTE_MBUF_DYN_NAMESIZE 64
> > +
> > +/**
> > + * Structure describing the parameters of a mbuf dynamic field.
> > + */
> > +struct rte_mbuf_dynfield {
> > +	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the field. */
> > +	size_t size;        /**< The number of bytes to reserve. */
> > +	size_t align;       /**< The alignment constraint (power of 2). */
> > +	unsigned int flags; /**< Reserved for future use, must be 0. */
> > +};
> > +
> > +/**
> > + * Structure describing the parameters of a mbuf dynamic flag.
> > + */
> > +struct rte_mbuf_dynflag {
> > +	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the dynamic flag. */
> > +	unsigned int flags; /**< Reserved for future use, must be 0. */
> > +};
> > +
> > +/**
> > + * Register space for a dynamic field in the mbuf structure.
> > + *
> > + * If the field is already registered (same name and parameters), its
> > + * offset is returned.
> > + *
> > + * @param params
> > + *   A structure containing the requested parameters (name, size,
> > + *   alignment constraint and flags).
> > + * @return
> > + *   The offset in the mbuf structure, or -1 on error.
> > + *   Possible values for rte_errno:
> > + *   - EINVAL: invalid parameters (size, align, or flags).
> > + *   - EEXIST: this name is already register with different parameters.
> > + *   - EPERM: called from a secondary process.
> > + *   - ENOENT: not enough room in mbuf.
> > + *   - ENOMEM: allocation failure.
> > + *   - ENAMETOOLONG: name does not ends with \0.
> > + */
> > +__rte_experimental
> > +int rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params);
> > +
> > +/**
> > + * Lookup for a registered dynamic mbuf field.
> > + *
> > + * @param name
> > + *   A string identifying the dynamic field.
> > + * @param params
> > + *   If not NULL, and if the lookup is successful, the structure is
> > + *   filled with the parameters of the dynamic field.
> > + * @return
> > + *   The offset of this field in the mbuf structure, or -1 on error.
> > + *   Possible values for rte_errno:
> > + *   - ENOENT: no dynamic field matches this name.
> > + */
> > +__rte_experimental
> > +int rte_mbuf_dynfield_lookup(const char *name,
> > +			struct rte_mbuf_dynfield *params);
> > +
> > +/**
> > + * Register a dynamic flag in the mbuf structure.
> > + *
> > + * If the flag is already registered (same name and parameters), its
> > + * offset is returned.
> > + *
> > + * @param params
> > + *   A structure containing the requested parameters of the dynamic
> > + *   flag (name and options).
> > + * @return
> > + *   The number of the reserved bit, or -1 on error.
> > + *   Possible values for rte_errno:
> > + *   - EINVAL: invalid parameters (size, align, or flags).
> > + *   - EEXIST: this name is already register with different parameters.
> > + *   - EPERM: called from a secondary process.
> > + *   - ENOENT: no more flag available.
> > + *   - ENOMEM: allocation failure.
> > + *   - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE - 1.
> > + */
> > +__rte_experimental
> > +int rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params);
> > +
> > +/**
> > + * Lookup for a registered dynamic mbuf flag.
> > + *
> > + * @param name
> > + *   A string identifying the dynamic flag.
> > + * @param params
> > + *   If not NULL, and if the lookup is successful, the structure is
> > + *   filled with the parameters of the dynamic flag.
> > + * @return
> > + *   The offset of this flag in the mbuf structure, or -1 on error.
> > + *   Possible values for rte_errno:
> > + *   - ENOENT: no dynamic flag matches this name.
> > + */
> > +__rte_experimental
> > +int rte_mbuf_dynflag_lookup(const char *name,
> > +			struct rte_mbuf_dynflag *params);
> > +
> > +/**
> > + * Helper macro to access to a dynamic field.
> > + */
> > +#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) + (offset)))
> > +
> > +#endif
> > diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
> > index 2662a37bf..a98310570 100644
> > --- a/lib/librte_mbuf/rte_mbuf_version.map
> > +++ b/lib/librte_mbuf/rte_mbuf_version.map
> > @@ -50,4 +50,8 @@ EXPERIMENTAL {
> >  	global:
> > 
> >  	rte_mbuf_check;
> > +	rte_mbuf_dynfield_lookup;
> > +	rte_mbuf_dynfield_register;
> > +	rte_mbuf_dynflag_lookup;
> > +	rte_mbuf_dynflag_register;
> >  } DPDK_18.08;
> > --
> > 2.20.1
> 

I will send a v2 shortly, thanks

Olivier

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-10-17  7:54     ` Olivier Matz
@ 2019-10-17 11:58       ` Ananyev, Konstantin
  2019-10-17 12:58         ` Olivier Matz
  0 siblings, 1 reply; 64+ messages in thread
From: Ananyev, Konstantin @ 2019-10-17 11:58 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Thomas Monjalon, Wang, Haiyue, Stephen Hemminger,
	Andrew Rybchenko, Wiles, Keith, Jerin Jacob Kollanukkaran


Hi Olivier,

> > > Many features require to store data inside the mbuf. As the room in mbuf
> > > structure is limited, it is not possible to have a field for each
> > > feature. Also, changing fields in the mbuf structure can break the API
> > > or ABI.
> > >
> > > This commit addresses these issues, by enabling the dynamic registration
> > > of fields or flags:
> > >
> > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > >   given size (>= 1 byte) and alignment constraint.
> > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > >
> > > The typical use case is a PMD that registers space for an offload
> > > feature, when the application requests to enable this feature.  As
> > > the space in mbuf is limited, the space should only be reserved if it
> > > is going to be used (i.e when the application explicitly asks for it).
> > >
> > > The registration can be done at any moment, but it is not possible
> > > to unregister fields or flags for now.
> >
> > Looks ok to me in general.
> > Some comments/suggestions inline.
> > Konstantin
> >
> > >
> > > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > > ---
> > >
> > > rfc -> v1
> > >
> > > * Rebase on top of master
> > > * Change registration API to use a structure instead of
> > >   variables, getting rid of #defines (Stephen's comment)
> > > * Update flag registration to use a similar API as fields.
> > > * Change max name length from 32 to 64 (sugg. by Thomas)
> > > * Enhance API documentation (Haiyue's and Andrew's comments)
> > > * Add a debug log at registration
> > > * Add some words in release note
> > > * Did some performance tests (sugg. by Andrew):
> > >   On my platform, reading a dynamic field takes ~3 cycles more
> > >   than a static field, and ~2 cycles more for writing.
> > >
> > >  app/test/test_mbuf.c                   | 114 ++++++-
> > >  doc/guides/rel_notes/release_19_11.rst |   7 +
> > >  lib/librte_mbuf/Makefile               |   2 +
> > >  lib/librte_mbuf/meson.build            |   6 +-
> > >  lib/librte_mbuf/rte_mbuf.h             |  25 +-
> > >  lib/librte_mbuf/rte_mbuf_dyn.c         | 408 +++++++++++++++++++++++++
> > >  lib/librte_mbuf/rte_mbuf_dyn.h         | 163 ++++++++++
> > >  lib/librte_mbuf/rte_mbuf_version.map   |   4 +
> > >  8 files changed, 724 insertions(+), 5 deletions(-)
> > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
> > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> > >
> > > --- a/lib/librte_mbuf/rte_mbuf.h
> > > +++ b/lib/librte_mbuf/rte_mbuf.h
> > > @@ -198,9 +198,12 @@ extern "C" {
> > >  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
> > >  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
> > >
> > > -/* add new RX flags here */
> > > +/* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> > >
> > > -/* add new TX flags here */
> > > +#define PKT_FIRST_FREE (1ULL << 23)
> > > +#define PKT_LAST_FREE (1ULL << 39)
> > > +
> > > +/* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> > >
> > >  /**
> > >   * Indicate that the metadata field in the mbuf is in use.
> > > @@ -738,6 +741,8 @@ struct rte_mbuf {
> > >  	 */
> > >  	struct rte_mbuf_ext_shared_info *shinfo;
> > >
> > > +	uint64_t dynfield1; /**< Reserved for dynamic fields. */
> > > +	uint64_t dynfield2; /**< Reserved for dynamic fields. */
> >
> > Wonder why just not one field:
> > 	union {
> > 		uint8_t u8[16];
> > 		...
> > 		uint64_t u64[2];
> > 	} dyn_field1;
> > ?
> > Probably would be a bit handy, to refer, register, etc. no?
> 
> I didn't find any place where we need an access through u8, so I
> just changed it into uint64_t dynfield1[2].

My thought was - if you'll have all dynamic stuff as one field (uint64_t dyn_field[2]),
then you woulnd't need any cycles at register() at all.
But up to you.

> 
> 
> >
> > >  } __rte_cache_aligned;
> > >
> > >  /**
> > > @@ -1684,6 +1689,21 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
> > >   */
> > >  #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
> > >
> > > +/**
> > > + * Copy dynamic fields from m_src to m_dst.
> > > + *
> > > + * @param m_dst
> > > + *   The destination mbuf.
> > > + * @param m_src
> > > + *   The source mbuf.
> > > + */
> > > +static inline void
> > > +rte_mbuf_dynfield_copy(struct rte_mbuf *m_dst, const struct rte_mbuf *m_src)
> > > +{
> > > +	m_dst->dynfield1 = m_src->dynfield1;
> > > +	m_dst->dynfield2 = m_src->dynfield2;
> > > +}
> > > +
> > >  /**
> > >   * Attach packet mbuf to another packet mbuf.
> > >   *
> > > @@ -1732,6 +1752,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
> > >  	mi->vlan_tci_outer = m->vlan_tci_outer;
> > >  	mi->tx_offload = m->tx_offload;
> > >  	mi->hash = m->hash;
> > > +	rte_mbuf_dynfield_copy(mi, m);
> > >
> > >  	mi->next = NULL;
> > >  	mi->pkt_len = mi->data_len;
> > > diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c b/lib/librte_mbuf/rte_mbuf_dyn.c
> > > new file mode 100644
> > > index 000000000..13b8742d0
> > > --- /dev/null
> > > +++ b/lib/librte_mbuf/rte_mbuf_dyn.c
> > > @@ -0,0 +1,408 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright 2019 6WIND S.A.
> > > + */
> > > +
> > > +#include <sys/queue.h>
> > > +
> > > +#include <rte_common.h>
> > > +#include <rte_eal.h>
> > > +#include <rte_eal_memconfig.h>
> > > +#include <rte_tailq.h>
> > > +#include <rte_errno.h>
> > > +#include <rte_malloc.h>
> > > +#include <rte_string_fns.h>
> > > +#include <rte_mbuf.h>
> > > +#include <rte_mbuf_dyn.h>
> > > +
> > > +#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
> > > +
> > > +struct mbuf_dynfield_elt {
> > > +	TAILQ_ENTRY(mbuf_dynfield_elt) next;
> > > +	struct rte_mbuf_dynfield params;
> > > +	int offset;
> >
> > Why not 'size_t offset', to avoid any explicit conversions, etc?
> 
> Fixed
> 
> 
> > > +};
> > > +TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
> > > +
> > > +static struct rte_tailq_elem mbuf_dynfield_tailq = {
> > > +	.name = "RTE_MBUF_DYNFIELD",
> > > +};
> > > +EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
> > > +
> > > +struct mbuf_dynflag_elt {
> > > +	TAILQ_ENTRY(mbuf_dynflag_elt) next;
> > > +	struct rte_mbuf_dynflag params;
> > > +	int bitnum;
> > > +};
> > > +TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
> > > +
> > > +static struct rte_tailq_elem mbuf_dynflag_tailq = {
> > > +	.name = "RTE_MBUF_DYNFLAG",
> > > +};
> > > +EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
> > > +
> > > +struct mbuf_dyn_shm {
> > > +	/** For each mbuf byte, free_space[i] == 1 if space is free. */
> > > +	uint8_t free_space[sizeof(struct rte_mbuf)];
> > > +	/** Bitfield of available flags. */
> > > +	uint64_t free_flags;
> > > +};
> > > +static struct mbuf_dyn_shm *shm;
> > > +
> > > +/* allocate and initialize the shared memory */
> > > +static int
> > > +init_shared_mem(void)
> > > +{
> > > +	const struct rte_memzone *mz;
> > > +	uint64_t mask;
> > > +
> > > +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > > +		mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
> > > +						sizeof(struct mbuf_dyn_shm),
> > > +						SOCKET_ID_ANY, 0,
> > > +						RTE_CACHE_LINE_SIZE);
> > > +	} else {
> > > +		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
> > > +	}
> > > +	if (mz == NULL)
> > > +		return -1;
> > > +
> > > +	shm = mz->addr;
> > > +
> > > +#define mark_free(field)						\
> > > +	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
> > > +		0xff, sizeof(((struct rte_mbuf *)0)->field))
> >
> > I think you can avoid defining/unedifying macros here by something like that:
> >
> > static const struct {
> >       size_t offset;
> >       size_t size;
> > } dyn_syms[] = {
> >     [0] = {.offset = offsetof(struct rte_mbuf, dynfield1), sizeof((struct rte_mbuf *)0)->dynfield1),
> >     [1] = {.offset = offsetof(struct rte_mbuf, dynfield2), sizeof((struct rte_mbuf *)0)->dynfield2),
> > };
> > ...
> >
> > for (i = 0; i != RTE_DIM(dyn_syms); i++)
> >     memset(shm->free_space + dym_syms[i].offset, UINT8_MAX, dym_syms[i].size);
> >
> 
> I tried it, but the following lines are too long
>      [0] = {offsetof(struct rte_mbuf, dynfield1), sizeof((struct rte_mbuf *)0)->dynfield1),
>      [1] = {offsetof(struct rte_mbuf, dynfield2), sizeof((struct rte_mbuf *)0)->dynfield2),
> To make them shorter, we can use a macro... but... wait :)

Guess what, you can put offset ans size on different lines :)
[0] = {
	.offset = offsetof(struct rte_mbuf, dynfield1),
	.size= sizeof((struct rte_mbuf *)0)->dynfield1),
},
....

> 
> > > +
> > > +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > > +		/* init free_space, keep it sync'd with
> > > +		 * rte_mbuf_dynfield_copy().
> > > +		 */
> > > +		memset(shm, 0, sizeof(*shm));
> > > +		mark_free(dynfield1);
> > > +		mark_free(dynfield2);
> > > +
> > > +		/* init free_flags */
> > > +		for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask <<= 1)
> > > +			shm->free_flags |= mask;
> > > +	}
> > > +#undef mark_free
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +/* check if this offset can be used */
> > > +static int
> > > +check_offset(size_t offset, size_t size, size_t align, unsigned int flags)
> > > +{
> > > +	size_t i;
> > > +
> > > +	(void)flags;
> >
> >
> > We have RTE_SET_USED() for such cases...
> > Though as it is an internal function probably better not to introduce
> > unused parameters at all.
> 
> I removed the flag parameter as you suggested.
> 
> 
> > > +
> > > +	if ((offset & (align - 1)) != 0)
> > > +		return -1;
> > > +	if (offset + size > sizeof(struct rte_mbuf))
> > > +		return -1;
> > > +
> > > +	for (i = 0; i < size; i++) {
> > > +		if (!shm->free_space[i + offset])
> > > +			return -1;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +/* assume tailq is locked */
> > > +static struct mbuf_dynfield_elt *
> > > +__mbuf_dynfield_lookup(const char *name)
> > > +{
> > > +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> > > +	struct mbuf_dynfield_elt *mbuf_dynfield;
> > > +	struct rte_tailq_entry *te;
> > > +
> > > +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> > > +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> > > +
> > > +	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
> > > +		mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
> > > +		if (strcmp(name, mbuf_dynfield->params.name) == 0)
> > > +			break;
> > > +	}
> > > +
> > > +	if (te == NULL) {
> > > +		rte_errno = ENOENT;
> > > +		return NULL;
> > > +	}
> > > +
> > > +	return mbuf_dynfield;
> > > +}
> > > +
> > > +int
> > > +rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield *params)
> > > +{
> > > +	struct mbuf_dynfield_elt *mbuf_dynfield;
> > > +
> > > +	if (shm == NULL) {
> > > +		rte_errno = ENOENT;
> > > +		return -1;
> > > +	}
> > > +
> > > +	rte_mcfg_tailq_read_lock();
> > > +	mbuf_dynfield = __mbuf_dynfield_lookup(name);
> > > +	rte_mcfg_tailq_read_unlock();
> > > +
> > > +	if (mbuf_dynfield == NULL) {
> > > +		rte_errno = ENOENT;
> > > +		return -1;
> > > +	}
> > > +
> > > +	if (params != NULL)
> > > +		memcpy(params, &mbuf_dynfield->params, sizeof(*params));
> > > +
> > > +	return mbuf_dynfield->offset;
> > > +}
> > > +
> > > +static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
> > > +		const struct rte_mbuf_dynfield *params2)
> > > +{
> > > +	if (strcmp(params1->name, params2->name))
> > > +		return -1;
> > > +	if (params1->size != params2->size)
> > > +		return -1;
> > > +	if (params1->align != params2->align)
> > > +		return -1;
> > > +	if (params1->flags != params2->flags)
> > > +		return -1;
> > > +	return 0;
> > > +}
> > > +
> > > +int
> > > +rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params)
> >
> > What I meant at user-space - if we can also have another function that would allow
> > user to specify required offset for dynfield explicitly, then user can define it as constant
> > value and let compiler do optimization work and hopefully generate faster code to access
> > this field.
> > Something like that:
> >
> > int rte_mbuf_dynfiled_register_offset(const struct rte_mbuf_dynfield *params, size_t offset);
> >
> > #define RTE_MBUF_DYNFIELD_OFFSET(fld, off)  (offsetof(struct rte_mbuf, fld) + (off))
> >
> > And then somewhere in user code:
> >
> > /* to let say reserve first 4B in dynfield1*/
> > #define MBUF_DYNFIELD_A	RTE_MBUF_DYNFIELD_OFFSET(dynfiled1, 0)
> > ...
> > params.name = RTE_STR(MBUF_DYNFIELD_A);
> > params.size = sizeof(uint32_t);
> > params.align = sizeof(uint32_t);
> > ret = rte_mbuf_dynfiled_register_offset(&params, MBUF_DYNFIELD_A);
> > if (ret != MBUF_DYNFIELD_A)  {
> >      /* handle it somehow, probably just terminate gracefully... */
> > }
> > ...
> >
> > /* to let say reserve last 2B in dynfield2*/
> > #define MBUF_DYNFIELD_B	RTE_MBUF_DYNFIELD_OFFSET(dynfiled2, 6)
> > ...
> > params.name = RTE_STR(MBUF_DYNFIELD_B);
> > params.size = sizeof(uint16_t);
> > params.align = sizeof(uint16_t);
> > ret = rte_mbuf_dynfiled_register_offset(&params, MBUF_DYNFIELD_B);
> >
> > After that user can use constant offsets MBUF_DYNFIELD_A/ MBUF_DYNFIELD_B
> > to access these fields.
> > Same thoughts for DYNFLAG.
> 
> I added the feature in v2.
> 
> 
> > > +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> > > +	struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
> > > +	struct rte_tailq_entry *te = NULL;
> > > +	int offset, ret;
> >
> > size_t offset
> > to avoid explicit conversions, etc.?
> >
> 
> Fixed.
> 
> 
> > > +	size_t i;
> > > +
> > > +	if (shm == NULL && init_shared_mem() < 0)
> > > +		goto fail;
> >
> > As I understand, here you allocate/initialize your shm without any lock protection,
> > though later you protect it via  rte_mcfg_tailq_write_lock().
> > That seems a bit flakey to me.
> > Why not to store information about free dynfield bytes inside mbuf_dynfield_tailq?
> > Let say  at init() create and add an entry into that list with some reserved name.
> > Then at register - grab mcfg_tailq_write_lock and do lookup
> > for such entry and then read/update it as needed.
> > It would help to avoid racing problem, plus you wouldn't need to
> > allocate/lookup for memzone.
> 
> I don't quite like the idea of having a special entry with a different type
> in an element list. Despite it is simpler for a locking perspective, it is
> less obvious for the developper.
> 
> Also, I changed the way a zone is reserved to return the one that have the
> less impact on next reservation, and I feel it is easier to implement with
> the shared memory.
> 
> So, I just moved the init_shared_mem() inside the rte_mcfg_tailq_write_lock(),
> it should do the job.

Yep, that should work too, I think.

> 
> 
> > > +	if (params->size >= sizeof(struct rte_mbuf)) {
> > > +		rte_errno = EINVAL;
> > > +		goto fail;
> > > +	}
> > > +	if (!rte_is_power_of_2(params->align)) {
> > > +		rte_errno = EINVAL;
> > > +		goto fail;
> > > +	}
> > > +	if (params->flags != 0) {
> > > +		rte_errno = EINVAL;
> > > +		goto fail;
> > > +	}
> > > +
> > > +	rte_mcfg_tailq_write_lock();
> > > +
> >
> > I think it probably would be cleaner and easier to read/maintain, if you'll put actual
> > code under lock protection into a separate function - as you did for __mbuf_dynfield_lookup().
> 
> Yes, I did that, it should be clearer now.
> 
> 

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH] mbuf: support dynamic fields and flags
  2019-10-17 11:58       ` Ananyev, Konstantin
@ 2019-10-17 12:58         ` Olivier Matz
  0 siblings, 0 replies; 64+ messages in thread
From: Olivier Matz @ 2019-10-17 12:58 UTC (permalink / raw)
  To: Ananyev, Konstantin
  Cc: dev, Thomas Monjalon, Wang, Haiyue, Stephen Hemminger,
	Andrew Rybchenko, Wiles, Keith, Jerin Jacob Kollanukkaran

Hi Konstantin,

On Thu, Oct 17, 2019 at 11:58:52AM +0000, Ananyev, Konstantin wrote:
> 
> Hi Olivier,
> 
> > > > Many features require to store data inside the mbuf. As the room in mbuf
> > > > structure is limited, it is not possible to have a field for each
> > > > feature. Also, changing fields in the mbuf structure can break the API
> > > > or ABI.
> > > >
> > > > This commit addresses these issues, by enabling the dynamic registration
> > > > of fields or flags:
> > > >
> > > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > > >   given size (>= 1 byte) and alignment constraint.
> > > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > > >
> > > > The typical use case is a PMD that registers space for an offload
> > > > feature, when the application requests to enable this feature.  As
> > > > the space in mbuf is limited, the space should only be reserved if it
> > > > is going to be used (i.e when the application explicitly asks for it).
> > > >
> > > > The registration can be done at any moment, but it is not possible
> > > > to unregister fields or flags for now.
> > >
> > > Looks ok to me in general.
> > > Some comments/suggestions inline.
> > > Konstantin
> > >
> > > >
> > > > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > > > ---
> > > >
> > > > rfc -> v1
> > > >
> > > > * Rebase on top of master
> > > > * Change registration API to use a structure instead of
> > > >   variables, getting rid of #defines (Stephen's comment)
> > > > * Update flag registration to use a similar API as fields.
> > > > * Change max name length from 32 to 64 (sugg. by Thomas)
> > > > * Enhance API documentation (Haiyue's and Andrew's comments)
> > > > * Add a debug log at registration
> > > > * Add some words in release note
> > > > * Did some performance tests (sugg. by Andrew):
> > > >   On my platform, reading a dynamic field takes ~3 cycles more
> > > >   than a static field, and ~2 cycles more for writing.
> > > >
> > > >  app/test/test_mbuf.c                   | 114 ++++++-
> > > >  doc/guides/rel_notes/release_19_11.rst |   7 +
> > > >  lib/librte_mbuf/Makefile               |   2 +
> > > >  lib/librte_mbuf/meson.build            |   6 +-
> > > >  lib/librte_mbuf/rte_mbuf.h             |  25 +-
> > > >  lib/librte_mbuf/rte_mbuf_dyn.c         | 408 +++++++++++++++++++++++++
> > > >  lib/librte_mbuf/rte_mbuf_dyn.h         | 163 ++++++++++
> > > >  lib/librte_mbuf/rte_mbuf_version.map   |   4 +
> > > >  8 files changed, 724 insertions(+), 5 deletions(-)
> > > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
> > > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> > > >
> > > > --- a/lib/librte_mbuf/rte_mbuf.h
> > > > +++ b/lib/librte_mbuf/rte_mbuf.h
> > > > @@ -198,9 +198,12 @@ extern "C" {
> > > >  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
> > > >  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
> > > >
> > > > -/* add new RX flags here */
> > > > +/* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> > > >
> > > > -/* add new TX flags here */
> > > > +#define PKT_FIRST_FREE (1ULL << 23)
> > > > +#define PKT_LAST_FREE (1ULL << 39)
> > > > +
> > > > +/* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> > > >
> > > >  /**
> > > >   * Indicate that the metadata field in the mbuf is in use.
> > > > @@ -738,6 +741,8 @@ struct rte_mbuf {
> > > >  	 */
> > > >  	struct rte_mbuf_ext_shared_info *shinfo;
> > > >
> > > > +	uint64_t dynfield1; /**< Reserved for dynamic fields. */
> > > > +	uint64_t dynfield2; /**< Reserved for dynamic fields. */
> > >
> > > Wonder why just not one field:
> > > 	union {
> > > 		uint8_t u8[16];
> > > 		...
> > > 		uint64_t u64[2];
> > > 	} dyn_field1;
> > > ?
> > > Probably would be a bit handy, to refer, register, etc. no?
> > 
> > I didn't find any place where we need an access through u8, so I
> > just changed it into uint64_t dynfield1[2].
> 
> My thought was - if you'll have all dynamic stuff as one field (uint64_t dyn_field[2]),
> then you woulnd't need any cycles at register() at all.
> But up to you.

I changed it.


> > 
> > >
> > > >  } __rte_cache_aligned;
> > > >
> > > >  /**
> > > > @@ -1684,6 +1689,21 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
> > > >   */
> > > >  #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
> > > >
> > > > +/**
> > > > + * Copy dynamic fields from m_src to m_dst.
> > > > + *
> > > > + * @param m_dst
> > > > + *   The destination mbuf.
> > > > + * @param m_src
> > > > + *   The source mbuf.
> > > > + */
> > > > +static inline void
> > > > +rte_mbuf_dynfield_copy(struct rte_mbuf *m_dst, const struct rte_mbuf *m_src)
> > > > +{
> > > > +	m_dst->dynfield1 = m_src->dynfield1;
> > > > +	m_dst->dynfield2 = m_src->dynfield2;
> > > > +}
> > > > +
> > > >  /**
> > > >   * Attach packet mbuf to another packet mbuf.
> > > >   *
> > > > @@ -1732,6 +1752,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
> > > >  	mi->vlan_tci_outer = m->vlan_tci_outer;
> > > >  	mi->tx_offload = m->tx_offload;
> > > >  	mi->hash = m->hash;
> > > > +	rte_mbuf_dynfield_copy(mi, m);
> > > >
> > > >  	mi->next = NULL;
> > > >  	mi->pkt_len = mi->data_len;
> > > > diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c b/lib/librte_mbuf/rte_mbuf_dyn.c
> > > > new file mode 100644
> > > > index 000000000..13b8742d0
> > > > --- /dev/null
> > > > +++ b/lib/librte_mbuf/rte_mbuf_dyn.c
> > > > @@ -0,0 +1,408 @@
> > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > + * Copyright 2019 6WIND S.A.
> > > > + */
> > > > +
> > > > +#include <sys/queue.h>
> > > > +
> > > > +#include <rte_common.h>
> > > > +#include <rte_eal.h>
> > > > +#include <rte_eal_memconfig.h>
> > > > +#include <rte_tailq.h>
> > > > +#include <rte_errno.h>
> > > > +#include <rte_malloc.h>
> > > > +#include <rte_string_fns.h>
> > > > +#include <rte_mbuf.h>
> > > > +#include <rte_mbuf_dyn.h>
> > > > +
> > > > +#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
> > > > +
> > > > +struct mbuf_dynfield_elt {
> > > > +	TAILQ_ENTRY(mbuf_dynfield_elt) next;
> > > > +	struct rte_mbuf_dynfield params;
> > > > +	int offset;
> > >
> > > Why not 'size_t offset', to avoid any explicit conversions, etc?
> > 
> > Fixed
> > 
> > 
> > > > +};
> > > > +TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
> > > > +
> > > > +static struct rte_tailq_elem mbuf_dynfield_tailq = {
> > > > +	.name = "RTE_MBUF_DYNFIELD",
> > > > +};
> > > > +EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
> > > > +
> > > > +struct mbuf_dynflag_elt {
> > > > +	TAILQ_ENTRY(mbuf_dynflag_elt) next;
> > > > +	struct rte_mbuf_dynflag params;
> > > > +	int bitnum;
> > > > +};
> > > > +TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
> > > > +
> > > > +static struct rte_tailq_elem mbuf_dynflag_tailq = {
> > > > +	.name = "RTE_MBUF_DYNFLAG",
> > > > +};
> > > > +EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
> > > > +
> > > > +struct mbuf_dyn_shm {
> > > > +	/** For each mbuf byte, free_space[i] == 1 if space is free. */
> > > > +	uint8_t free_space[sizeof(struct rte_mbuf)];
> > > > +	/** Bitfield of available flags. */
> > > > +	uint64_t free_flags;
> > > > +};
> > > > +static struct mbuf_dyn_shm *shm;
> > > > +
> > > > +/* allocate and initialize the shared memory */
> > > > +static int
> > > > +init_shared_mem(void)
> > > > +{
> > > > +	const struct rte_memzone *mz;
> > > > +	uint64_t mask;
> > > > +
> > > > +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > > > +		mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
> > > > +						sizeof(struct mbuf_dyn_shm),
> > > > +						SOCKET_ID_ANY, 0,
> > > > +						RTE_CACHE_LINE_SIZE);
> > > > +	} else {
> > > > +		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
> > > > +	}
> > > > +	if (mz == NULL)
> > > > +		return -1;
> > > > +
> > > > +	shm = mz->addr;
> > > > +
> > > > +#define mark_free(field)						\
> > > > +	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
> > > > +		0xff, sizeof(((struct rte_mbuf *)0)->field))
> > >
> > > I think you can avoid defining/unedifying macros here by something like that:
> > >
> > > static const struct {
> > >       size_t offset;
> > >       size_t size;
> > > } dyn_syms[] = {
> > >     [0] = {.offset = offsetof(struct rte_mbuf, dynfield1), sizeof((struct rte_mbuf *)0)->dynfield1),
> > >     [1] = {.offset = offsetof(struct rte_mbuf, dynfield2), sizeof((struct rte_mbuf *)0)->dynfield2),
> > > };
> > > ...
> > >
> > > for (i = 0; i != RTE_DIM(dyn_syms); i++)
> > >     memset(shm->free_space + dym_syms[i].offset, UINT8_MAX, dym_syms[i].size);
> > >
> > 
> > I tried it, but the following lines are too long
> >      [0] = {offsetof(struct rte_mbuf, dynfield1), sizeof((struct rte_mbuf *)0)->dynfield1),
> >      [1] = {offsetof(struct rte_mbuf, dynfield2), sizeof((struct rte_mbuf *)0)->dynfield2),
> > To make them shorter, we can use a macro... but... wait :)
> 
> Guess what, you can put offset ans size on different lines :)
> [0] = {
> 	.offset = offsetof(struct rte_mbuf, dynfield1),
> 	.size= sizeof((struct rte_mbuf *)0)->dynfield1),
> },

Yes, but honnestly, I'm not sure that it will be more readable than
the macro, knowing that we could add fields in the future.


> ....
> 
> > 
> > > > +
> > > > +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > > > +		/* init free_space, keep it sync'd with
> > > > +		 * rte_mbuf_dynfield_copy().
> > > > +		 */
> > > > +		memset(shm, 0, sizeof(*shm));
> > > > +		mark_free(dynfield1);
> > > > +		mark_free(dynfield2);
> > > > +
> > > > +		/* init free_flags */
> > > > +		for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask <<= 1)
> > > > +			shm->free_flags |= mask;
> > > > +	}
> > > > +#undef mark_free
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +/* check if this offset can be used */
> > > > +static int
> > > > +check_offset(size_t offset, size_t size, size_t align, unsigned int flags)
> > > > +{
> > > > +	size_t i;
> > > > +
> > > > +	(void)flags;
> > >
> > >
> > > We have RTE_SET_USED() for such cases...
> > > Though as it is an internal function probably better not to introduce
> > > unused parameters at all.
> > 
> > I removed the flag parameter as you suggested.
> > 
> > 
> > > > +
> > > > +	if ((offset & (align - 1)) != 0)
> > > > +		return -1;
> > > > +	if (offset + size > sizeof(struct rte_mbuf))
> > > > +		return -1;
> > > > +
> > > > +	for (i = 0; i < size; i++) {
> > > > +		if (!shm->free_space[i + offset])
> > > > +			return -1;
> > > > +	}
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +/* assume tailq is locked */
> > > > +static struct mbuf_dynfield_elt *
> > > > +__mbuf_dynfield_lookup(const char *name)
> > > > +{
> > > > +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> > > > +	struct mbuf_dynfield_elt *mbuf_dynfield;
> > > > +	struct rte_tailq_entry *te;
> > > > +
> > > > +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> > > > +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> > > > +
> > > > +	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
> > > > +		mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
> > > > +		if (strcmp(name, mbuf_dynfield->params.name) == 0)
> > > > +			break;
> > > > +	}
> > > > +
> > > > +	if (te == NULL) {
> > > > +		rte_errno = ENOENT;
> > > > +		return NULL;
> > > > +	}
> > > > +
> > > > +	return mbuf_dynfield;
> > > > +}
> > > > +
> > > > +int
> > > > +rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield *params)
> > > > +{
> > > > +	struct mbuf_dynfield_elt *mbuf_dynfield;
> > > > +
> > > > +	if (shm == NULL) {
> > > > +		rte_errno = ENOENT;
> > > > +		return -1;
> > > > +	}
> > > > +
> > > > +	rte_mcfg_tailq_read_lock();
> > > > +	mbuf_dynfield = __mbuf_dynfield_lookup(name);
> > > > +	rte_mcfg_tailq_read_unlock();
> > > > +
> > > > +	if (mbuf_dynfield == NULL) {
> > > > +		rte_errno = ENOENT;
> > > > +		return -1;
> > > > +	}
> > > > +
> > > > +	if (params != NULL)
> > > > +		memcpy(params, &mbuf_dynfield->params, sizeof(*params));
> > > > +
> > > > +	return mbuf_dynfield->offset;
> > > > +}
> > > > +
> > > > +static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
> > > > +		const struct rte_mbuf_dynfield *params2)
> > > > +{
> > > > +	if (strcmp(params1->name, params2->name))
> > > > +		return -1;
> > > > +	if (params1->size != params2->size)
> > > > +		return -1;
> > > > +	if (params1->align != params2->align)
> > > > +		return -1;
> > > > +	if (params1->flags != params2->flags)
> > > > +		return -1;
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +int
> > > > +rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params)
> > >
> > > What I meant at user-space - if we can also have another function that would allow
> > > user to specify required offset for dynfield explicitly, then user can define it as constant
> > > value and let compiler do optimization work and hopefully generate faster code to access
> > > this field.
> > > Something like that:
> > >
> > > int rte_mbuf_dynfiled_register_offset(const struct rte_mbuf_dynfield *params, size_t offset);
> > >
> > > #define RTE_MBUF_DYNFIELD_OFFSET(fld, off)  (offsetof(struct rte_mbuf, fld) + (off))
> > >
> > > And then somewhere in user code:
> > >
> > > /* to let say reserve first 4B in dynfield1*/
> > > #define MBUF_DYNFIELD_A	RTE_MBUF_DYNFIELD_OFFSET(dynfiled1, 0)
> > > ...
> > > params.name = RTE_STR(MBUF_DYNFIELD_A);
> > > params.size = sizeof(uint32_t);
> > > params.align = sizeof(uint32_t);
> > > ret = rte_mbuf_dynfiled_register_offset(&params, MBUF_DYNFIELD_A);
> > > if (ret != MBUF_DYNFIELD_A)  {
> > >      /* handle it somehow, probably just terminate gracefully... */
> > > }
> > > ...
> > >
> > > /* to let say reserve last 2B in dynfield2*/
> > > #define MBUF_DYNFIELD_B	RTE_MBUF_DYNFIELD_OFFSET(dynfiled2, 6)
> > > ...
> > > params.name = RTE_STR(MBUF_DYNFIELD_B);
> > > params.size = sizeof(uint16_t);
> > > params.align = sizeof(uint16_t);
> > > ret = rte_mbuf_dynfiled_register_offset(&params, MBUF_DYNFIELD_B);
> > >
> > > After that user can use constant offsets MBUF_DYNFIELD_A/ MBUF_DYNFIELD_B
> > > to access these fields.
> > > Same thoughts for DYNFLAG.
> > 
> > I added the feature in v2.
> > 
> > 
> > > > +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> > > > +	struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
> > > > +	struct rte_tailq_entry *te = NULL;
> > > > +	int offset, ret;
> > >
> > > size_t offset
> > > to avoid explicit conversions, etc.?
> > >
> > 
> > Fixed.
> > 
> > 
> > > > +	size_t i;
> > > > +
> > > > +	if (shm == NULL && init_shared_mem() < 0)
> > > > +		goto fail;
> > >
> > > As I understand, here you allocate/initialize your shm without any lock protection,
> > > though later you protect it via  rte_mcfg_tailq_write_lock().
> > > That seems a bit flakey to me.
> > > Why not to store information about free dynfield bytes inside mbuf_dynfield_tailq?
> > > Let say  at init() create and add an entry into that list with some reserved name.
> > > Then at register - grab mcfg_tailq_write_lock and do lookup
> > > for such entry and then read/update it as needed.
> > > It would help to avoid racing problem, plus you wouldn't need to
> > > allocate/lookup for memzone.
> > 
> > I don't quite like the idea of having a special entry with a different type
> > in an element list. Despite it is simpler for a locking perspective, it is
> > less obvious for the developper.
> > 
> > Also, I changed the way a zone is reserved to return the one that have the
> > less impact on next reservation, and I feel it is easier to implement with
> > the shared memory.
> > 
> > So, I just moved the init_shared_mem() inside the rte_mcfg_tailq_write_lock(),
> > it should do the job.
> 
> Yep, that should work too, I think.
> 
> > 
> > 
> > > > +	if (params->size >= sizeof(struct rte_mbuf)) {
> > > > +		rte_errno = EINVAL;
> > > > +		goto fail;
> > > > +	}
> > > > +	if (!rte_is_power_of_2(params->align)) {
> > > > +		rte_errno = EINVAL;
> > > > +		goto fail;
> > > > +	}
> > > > +	if (params->flags != 0) {
> > > > +		rte_errno = EINVAL;
> > > > +		goto fail;
> > > > +	}
> > > > +
> > > > +	rte_mcfg_tailq_write_lock();
> > > > +
> > >
> > > I think it probably would be cleaner and easier to read/maintain, if you'll put actual
> > > code under lock protection into a separate function - as you did for __mbuf_dynfield_lookup().
> > 
> > Yes, I did that, it should be clearer now.
> > 
> > 

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-07-10  9:29 [dpdk-dev] [RFC] mbuf: support dynamic fields and flags Olivier Matz
                   ` (4 preceding siblings ...)
  2019-09-18 16:54 ` [dpdk-dev] [PATCH] " Olivier Matz
@ 2019-10-17 14:42 ` Olivier Matz
  2019-10-18  2:47   ` Wang, Haiyue
                     ` (3 more replies)
  2019-10-24  8:13 ` [dpdk-dev] [PATCH v3] " Olivier Matz
  2019-10-26 12:39 ` [dpdk-dev] [PATCH v4] " Olivier Matz
  7 siblings, 4 replies; 64+ messages in thread
From: Olivier Matz @ 2019-10-17 14:42 UTC (permalink / raw)
  To: dev
  Cc: Andrew Rybchenko, Bruce Richardson, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Stephen Hemminger, Thomas Monjalon

Many features require to store data inside the mbuf. As the room in mbuf
structure is limited, it is not possible to have a field for each
feature. Also, changing fields in the mbuf structure can break the API
or ABI.

This commit addresses these issues, by enabling the dynamic registration
of fields or flags:

- a dynamic field is a named area in the rte_mbuf structure, with a
  given size (>= 1 byte) and alignment constraint.
- a dynamic flag is a named bit in the rte_mbuf structure.

The typical use case is a PMD that registers space for an offload
feature, when the application requests to enable this feature.  As
the space in mbuf is limited, the space should only be reserved if it
is going to be used (i.e when the application explicitly asks for it).

The registration can be done at any moment, but it is not possible
to unregister fields or flags for now.

Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
Acked-by: Thomas Monjalon <thomas@monjalon.net>
---

v2

* Rebase on top of master: solve conflict with Stephen's patchset
  (packet copy)
* Add new apis to register a dynamic field/flag at a specific place
* Add a dump function (sugg by David)
* Enhance field registration function to select the best offset, keeping
  large aligned zones as much as possible (sugg by Konstantin)
* Use a size_t and unsigned int instead of int when relevant
  (sugg by Konstantin)
* Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
  (sugg by Konstantin)
* Remove unused argument in private function (sugg by Konstantin)
* Fix and simplify locking (sugg by Konstantin)
* Fix minor typo

rfc -> v1

* Rebase on top of master
* Change registration API to use a structure instead of
  variables, getting rid of #defines (Stephen's comment)
* Update flag registration to use a similar API as fields.
* Change max name length from 32 to 64 (sugg. by Thomas)
* Enhance API documentation (Haiyue's and Andrew's comments)
* Add a debug log at registration
* Add some words in release note
* Did some performance tests (sugg. by Andrew):
  On my platform, reading a dynamic field takes ~3 cycles more
  than a static field, and ~2 cycles more for writing.

 app/test/test_mbuf.c                   | 145 ++++++-
 doc/guides/rel_notes/release_19_11.rst |   7 +
 lib/librte_mbuf/Makefile               |   2 +
 lib/librte_mbuf/meson.build            |   6 +-
 lib/librte_mbuf/rte_mbuf.h             |  23 +-
 lib/librte_mbuf/rte_mbuf_dyn.c         | 548 +++++++++++++++++++++++++
 lib/librte_mbuf/rte_mbuf_dyn.h         | 226 ++++++++++
 lib/librte_mbuf/rte_mbuf_version.map   |   7 +
 8 files changed, 959 insertions(+), 5 deletions(-)
 create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
 create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h

diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index b9c2b2500..01cafad59 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -28,6 +28,7 @@
 #include <rte_random.h>
 #include <rte_cycles.h>
 #include <rte_malloc.h>
+#include <rte_mbuf_dyn.h>
 
 #include "test.h"
 
@@ -657,7 +658,6 @@ test_attach_from_different_pool(struct rte_mempool *pktmbuf_pool,
 		rte_pktmbuf_free(clone2);
 	return -1;
 }
-#undef GOTO_FAIL
 
 /*
  * test allocation and free of mbufs
@@ -1276,6 +1276,143 @@ test_tx_offload(void)
 	return (v1 == v2) ? 0 : -EINVAL;
 }
 
+static int
+test_mbuf_dyn(struct rte_mempool *pktmbuf_pool)
+{
+	const struct rte_mbuf_dynfield dynfield = {
+		.name = "test-dynfield",
+		.size = sizeof(uint8_t),
+		.align = __alignof__(uint8_t),
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynfield dynfield2 = {
+		.name = "test-dynfield2",
+		.size = sizeof(uint16_t),
+		.align = __alignof__(uint16_t),
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynfield dynfield3 = {
+		.name = "test-dynfield3",
+		.size = sizeof(uint8_t),
+		.align = __alignof__(uint8_t),
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynfield dynfield_fail_big = {
+		.name = "test-dynfield-fail-big",
+		.size = 256,
+		.align = 1,
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynfield dynfield_fail_align = {
+		.name = "test-dynfield-fail-align",
+		.size = 1,
+		.align = 3,
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynflag dynflag = {
+		.name = "test-dynflag",
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynflag dynflag2 = {
+		.name = "test-dynflag2",
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynflag dynflag3 = {
+		.name = "test-dynflag3",
+		.flags = 0,
+	};
+	struct rte_mbuf *m = NULL;
+	int offset, offset2, offset3;
+	int flag, flag2, flag3;
+	int ret;
+
+	printf("Test mbuf dynamic fields and flags\n");
+	rte_mbuf_dyn_dump(stdout);
+
+	offset = rte_mbuf_dynfield_register(&dynfield);
+	if (offset == -1)
+		GOTO_FAIL("failed to register dynamic field, offset=%d: %s",
+			offset, strerror(errno));
+
+	ret = rte_mbuf_dynfield_register(&dynfield);
+	if (ret != offset)
+		GOTO_FAIL("failed to lookup dynamic field, ret=%d: %s",
+			ret, strerror(errno));
+
+	offset2 = rte_mbuf_dynfield_register(&dynfield2);
+	if (offset2 == -1 || offset2 == offset || (offset2 & 1))
+		GOTO_FAIL("failed to register dynamic field 2, offset2=%d: %s",
+			offset2, strerror(errno));
+
+	offset3 = rte_mbuf_dynfield_register_offset(&dynfield3,
+				offsetof(struct rte_mbuf, dynfield1[1]));
+	if (offset3 != offsetof(struct rte_mbuf, dynfield1[1]))
+		GOTO_FAIL("failed to register dynamic field 3, offset=%d: %s",
+			offset3, strerror(errno));
+
+	printf("dynfield: offset=%d, offset2=%d, offset3=%d\n",
+		offset, offset2, offset3);
+
+	ret = rte_mbuf_dynfield_register(&dynfield_fail_big);
+	if (ret != -1)
+		GOTO_FAIL("dynamic field creation should fail (too big)");
+
+	ret = rte_mbuf_dynfield_register(&dynfield_fail_align);
+	if (ret != -1)
+		GOTO_FAIL("dynamic field creation should fail (bad alignment)");
+
+	ret = rte_mbuf_dynfield_register_offset(&dynfield_fail_align,
+				offsetof(struct rte_mbuf, ol_flags));
+	if (ret != -1)
+		GOTO_FAIL("dynamic field creation should fail (not avail)");
+
+	flag = rte_mbuf_dynflag_register(&dynflag);
+	if (flag == -1)
+		GOTO_FAIL("failed to register dynamic flag, flag=%d: %s",
+			flag, strerror(errno));
+
+	ret = rte_mbuf_dynflag_register(&dynflag);
+	if (ret != flag)
+		GOTO_FAIL("failed to lookup dynamic flag, ret=%d: %s",
+			ret, strerror(errno));
+
+	flag2 = rte_mbuf_dynflag_register(&dynflag2);
+	if (flag2 == -1 || flag2 == flag)
+		GOTO_FAIL("failed to register dynamic flag 2, flag2=%d: %s",
+			flag2, strerror(errno));
+
+	flag3 = rte_mbuf_dynflag_register_bitnum(&dynflag3,
+						rte_bsf64(PKT_LAST_FREE));
+	if (flag3 != rte_bsf64(PKT_LAST_FREE))
+		GOTO_FAIL("failed to register dynamic flag 3, flag2=%d: %s",
+			flag3, strerror(errno));
+
+	printf("dynflag: flag=%d, flag2=%d, flag3=%d\n", flag, flag2, flag3);
+
+	/* set, get dynamic field */
+	m = rte_pktmbuf_alloc(pktmbuf_pool);
+	if (m == NULL)
+		GOTO_FAIL("Cannot allocate mbuf");
+
+	*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) = 1;
+	if (*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) != 1)
+		GOTO_FAIL("failed to read dynamic field");
+	*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) = 1000;
+	if (*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) != 1000)
+		GOTO_FAIL("failed to read dynamic field");
+
+	/* set a dynamic flag */
+	m->ol_flags |= (1ULL << flag);
+
+	rte_mbuf_dyn_dump(stdout);
+	rte_pktmbuf_free(m);
+	return 0;
+fail:
+	rte_pktmbuf_free(m);
+	return -1;
+}
+#undef GOTO_FAIL
+
 static int
 test_mbuf(void)
 {
@@ -1295,6 +1432,12 @@ test_mbuf(void)
 		goto err;
 	}
 
+	/* test registration of dynamic fields and flags */
+	if (test_mbuf_dyn(pktmbuf_pool) < 0) {
+		printf("mbuf dynflag test failed\n");
+		goto err;
+	}
+
 	/* create a specific pktmbuf pool with a priv_size != 0 and no data
 	 * room size */
 	pktmbuf_pool2 = rte_pktmbuf_pool_create("test_pktmbuf_pool2",
diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst
index 85953b962..9e9c94554 100644
--- a/doc/guides/rel_notes/release_19_11.rst
+++ b/doc/guides/rel_notes/release_19_11.rst
@@ -21,6 +21,13 @@ DPDK Release 19.11
 
       xdg-open build/doc/html/guides/rel_notes/release_19_11.html
 
+* **Add support of support dynamic fields and flags in mbuf.**
+
+  This new feature adds the ability to dynamically register some room
+  for a field or a flag in the mbuf structure. This is typically used
+  for specific offload features, where adding a static field or flag
+  in the mbuf is not justified.
+
 
 New Features
 ------------
diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile
index c8f6d2689..5a9bcee73 100644
--- a/lib/librte_mbuf/Makefile
+++ b/lib/librte_mbuf/Makefile
@@ -17,8 +17,10 @@ LIBABIVER := 5
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c rte_mbuf_pool_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_MBUF) += rte_mbuf_dyn.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h rte_mbuf_ptype.h rte_mbuf_pool_ops.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include += rte_mbuf_dyn.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build
index 6cc11ebb4..9137e8f26 100644
--- a/lib/librte_mbuf/meson.build
+++ b/lib/librte_mbuf/meson.build
@@ -2,8 +2,10 @@
 # Copyright(c) 2017 Intel Corporation
 
 version = 5
-sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c')
-headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
+sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c',
+	'rte_mbuf_dyn.c')
+headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h',
+	'rte_mbuf_dyn.h')
 deps += ['mempool']
 
 allow_experimental_apis = true
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index fb0849ac1..5740b1e93 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -198,9 +198,12 @@ extern "C" {
 #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
 #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
 
-/* add new RX flags here */
+/* add new RX flags here, don't forget to update PKT_FIRST_FREE */
 
-/* add new TX flags here */
+#define PKT_FIRST_FREE (1ULL << 23)
+#define PKT_LAST_FREE (1ULL << 39)
+
+/* add new TX flags here, don't forget to update PKT_LAST_FREE  */
 
 /**
  * Indicate that the metadata field in the mbuf is in use.
@@ -738,6 +741,7 @@ struct rte_mbuf {
 	 */
 	struct rte_mbuf_ext_shared_info *shinfo;
 
+	uint64_t dynfield1[2]; /**< Reserved for dynamic fields. */
 } __rte_cache_aligned;
 
 /**
@@ -1684,6 +1688,20 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
  */
 #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
 
+/**
+ * Copy dynamic fields from m_src to m_dst.
+ *
+ * @param m_dst
+ *   The destination mbuf.
+ * @param m_src
+ *   The source mbuf.
+ */
+static inline void
+rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
+{
+	memcpy(&mdst->dynfield1, msrc->dynfield1, sizeof(mdst->dynfield1));
+}
+
 /* internal */
 static inline void
 __rte_pktmbuf_copy_hdr(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
@@ -1695,6 +1713,7 @@ __rte_pktmbuf_copy_hdr(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
 	mdst->hash = msrc->hash;
 	mdst->packet_type = msrc->packet_type;
 	mdst->timestamp = msrc->timestamp;
+	rte_mbuf_dynfield_copy(mdst, msrc);
 }
 
 /**
diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c b/lib/librte_mbuf/rte_mbuf_dyn.c
new file mode 100644
index 000000000..9ef235483
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf_dyn.c
@@ -0,0 +1,548 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 6WIND S.A.
+ */
+
+#include <sys/queue.h>
+#include <stdint.h>
+#include <limits.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_tailq.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+#include <rte_mbuf.h>
+#include <rte_mbuf_dyn.h>
+
+#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
+
+struct mbuf_dynfield_elt {
+	TAILQ_ENTRY(mbuf_dynfield_elt) next;
+	struct rte_mbuf_dynfield params;
+	size_t offset;
+};
+TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
+
+static struct rte_tailq_elem mbuf_dynfield_tailq = {
+	.name = "RTE_MBUF_DYNFIELD",
+};
+EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
+
+struct mbuf_dynflag_elt {
+	TAILQ_ENTRY(mbuf_dynflag_elt) next;
+	struct rte_mbuf_dynflag params;
+	unsigned int bitnum;
+};
+TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
+
+static struct rte_tailq_elem mbuf_dynflag_tailq = {
+	.name = "RTE_MBUF_DYNFLAG",
+};
+EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
+
+struct mbuf_dyn_shm {
+	/**
+	 * For each mbuf byte, free_space[i] != 0 if space is free.
+	 * The value is the size of the biggest aligned element that
+	 * can fit in the zone.
+	 */
+	uint8_t free_space[sizeof(struct rte_mbuf)];
+	/** Bitfield of available flags. */
+	uint64_t free_flags;
+};
+static struct mbuf_dyn_shm *shm;
+
+/* Set the value of free_space[] according to the size and alignment of
+ * the free areas. This helps to select the best place when reserving a
+ * dynamic field. Assume tailq is locked.
+ */
+static void
+process_score(void)
+{
+	size_t off, align, size, i;
+
+	/* first, erase previous info */
+	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
+		if (shm->free_space[i])
+			shm->free_space[i] = 1;
+	}
+
+	for (off = 0; off < sizeof(struct rte_mbuf); off++) {
+		/* get the size of the free zone */
+		for (size = 0; shm->free_space[off + size]; size++)
+			;
+		if (size == 0)
+			continue;
+
+		/* get the alignment of biggest object that can fit in
+		 * the zone at this offset.
+		 */
+		for (align = 1;
+		     (off % (align << 1)) == 0 && (align << 1) <= size;
+		     align <<= 1)
+			;
+
+		/* save it in free_space[] */
+		for (i = off; i < off + size; i++)
+			shm->free_space[i] = RTE_MAX(align, shm->free_space[i]);
+	}
+}
+
+/* Allocate and initialize the shared memory. Assume tailq is locked */
+static int
+init_shared_mem(void)
+{
+	const struct rte_memzone *mz;
+	uint64_t mask;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
+						sizeof(struct mbuf_dyn_shm),
+						SOCKET_ID_ANY, 0,
+						RTE_CACHE_LINE_SIZE);
+	} else {
+		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
+	}
+	if (mz == NULL)
+		return -1;
+
+	shm = mz->addr;
+
+#define mark_free(field)						\
+	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
+		1, sizeof(((struct rte_mbuf *)0)->field))
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		/* init free_space, keep it sync'd with
+		 * rte_mbuf_dynfield_copy().
+		 */
+		memset(shm, 0, sizeof(*shm));
+		mark_free(dynfield1);
+
+		/* init free_flags */
+		for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask <<= 1)
+			shm->free_flags |= mask;
+
+		process_score();
+	}
+#undef mark_free
+
+	return 0;
+}
+
+/* check if this offset can be used */
+static int
+check_offset(size_t offset, size_t size, size_t align)
+{
+	size_t i;
+
+	if ((offset & (align - 1)) != 0)
+		return -1;
+	if (offset + size > sizeof(struct rte_mbuf))
+		return -1;
+
+	for (i = 0; i < size; i++) {
+		if (!shm->free_space[i + offset])
+			return -1;
+	}
+
+	return 0;
+}
+
+/* assume tailq is locked */
+static struct mbuf_dynfield_elt *
+__mbuf_dynfield_lookup(const char *name)
+{
+	struct mbuf_dynfield_list *mbuf_dynfield_list;
+	struct mbuf_dynfield_elt *mbuf_dynfield;
+	struct rte_tailq_entry *te;
+
+	mbuf_dynfield_list = RTE_TAILQ_CAST(
+		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
+
+	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
+		mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
+		if (strcmp(name, mbuf_dynfield->params.name) == 0)
+			break;
+	}
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return mbuf_dynfield;
+}
+
+int
+rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield *params)
+{
+	struct mbuf_dynfield_elt *mbuf_dynfield;
+
+	if (shm == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	rte_mcfg_tailq_read_lock();
+	mbuf_dynfield = __mbuf_dynfield_lookup(name);
+	rte_mcfg_tailq_read_unlock();
+
+	if (mbuf_dynfield == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	if (params != NULL)
+		memcpy(params, &mbuf_dynfield->params, sizeof(*params));
+
+	return mbuf_dynfield->offset;
+}
+
+static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
+		const struct rte_mbuf_dynfield *params2)
+{
+	if (strcmp(params1->name, params2->name))
+		return -1;
+	if (params1->size != params2->size)
+		return -1;
+	if (params1->align != params2->align)
+		return -1;
+	if (params1->flags != params2->flags)
+		return -1;
+	return 0;
+}
+
+/* assume tailq is locked */
+static int
+__rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
+				size_t req)
+{
+	struct mbuf_dynfield_list *mbuf_dynfield_list;
+	struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
+	struct rte_tailq_entry *te = NULL;
+	unsigned int best_zone = UINT_MAX;
+	size_t i, offset;
+	int ret;
+
+	if (shm == NULL && init_shared_mem() < 0)
+		return -1;
+
+	mbuf_dynfield = __mbuf_dynfield_lookup(params->name);
+	if (mbuf_dynfield != NULL) {
+		if (req != SIZE_MAX && req != mbuf_dynfield->offset) {
+			rte_errno = EEXIST;
+			return -1;
+		}
+		if (mbuf_dynfield_cmp(params, &mbuf_dynfield->params) < 0) {
+			rte_errno = EEXIST;
+			return -1;
+		}
+		return mbuf_dynfield->offset;
+	}
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		rte_errno = EPERM;
+		return -1;
+	}
+
+	if (req == SIZE_MAX) {
+		for (offset = 0;
+		     offset < sizeof(struct rte_mbuf);
+		     offset++) {
+			if (check_offset(offset, params->size,
+						params->align) == 0 &&
+					shm->free_space[offset] < best_zone) {
+				best_zone = shm->free_space[offset];
+				req = offset;
+			}
+		}
+		if (req == SIZE_MAX) {
+			rte_errno = ENOENT;
+			return -1;
+		}
+	} else {
+		if (check_offset(req, params->size, params->align) < 0) {
+			rte_errno = EBUSY;
+			return -1;
+		}
+	}
+
+	offset = req;
+	mbuf_dynfield_list = RTE_TAILQ_CAST(
+		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
+
+	te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL)
+		return -1;
+
+	mbuf_dynfield = rte_zmalloc("mbuf_dynfield", sizeof(*mbuf_dynfield), 0);
+	if (mbuf_dynfield == NULL) {
+		rte_free(te);
+		return -1;
+	}
+
+	ret = strlcpy(mbuf_dynfield->params.name, params->name,
+		sizeof(mbuf_dynfield->params.name));
+	if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->params.name)) {
+		rte_errno = ENAMETOOLONG;
+		rte_free(mbuf_dynfield);
+		rte_free(te);
+		return -1;
+	}
+	memcpy(&mbuf_dynfield->params, params, sizeof(mbuf_dynfield->params));
+	mbuf_dynfield->offset = offset;
+	te->data = mbuf_dynfield;
+
+	TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
+
+	for (i = offset; i < offset + params->size; i++)
+		shm->free_space[i] = 0;
+	process_score();
+
+	RTE_LOG(DEBUG, MBUF, "Registered dynamic field %s (sz=%zu, al=%zu, fl=0x%x) -> %zd\n",
+		params->name, params->size, params->align, params->flags,
+		offset);
+
+	return offset;
+}
+
+int
+rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
+				size_t req)
+{
+	int ret;
+
+	if (params->size >= sizeof(struct rte_mbuf)) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	if (!rte_is_power_of_2(params->align)) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	if (params->flags != 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	rte_mcfg_tailq_write_lock();
+	ret = __rte_mbuf_dynfield_register_offset(params, req);
+	rte_mcfg_tailq_write_unlock();
+
+	return ret;
+}
+
+int
+rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params)
+{
+	return rte_mbuf_dynfield_register_offset(params, SIZE_MAX);
+}
+
+/* assume tailq is locked */
+static struct mbuf_dynflag_elt *
+__mbuf_dynflag_lookup(const char *name)
+{
+	struct mbuf_dynflag_list *mbuf_dynflag_list;
+	struct mbuf_dynflag_elt *mbuf_dynflag;
+	struct rte_tailq_entry *te;
+
+	mbuf_dynflag_list = RTE_TAILQ_CAST(
+		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
+
+	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
+		mbuf_dynflag = (struct mbuf_dynflag_elt *)te->data;
+		if (strncmp(name, mbuf_dynflag->params.name,
+				RTE_MBUF_DYN_NAMESIZE) == 0)
+			break;
+	}
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return mbuf_dynflag;
+}
+
+int
+rte_mbuf_dynflag_lookup(const char *name,
+			struct rte_mbuf_dynflag *params)
+{
+	struct mbuf_dynflag_elt *mbuf_dynflag;
+
+	if (shm == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	rte_mcfg_tailq_read_lock();
+	mbuf_dynflag = __mbuf_dynflag_lookup(name);
+	rte_mcfg_tailq_read_unlock();
+
+	if (mbuf_dynflag == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	if (params != NULL)
+		memcpy(params, &mbuf_dynflag->params, sizeof(*params));
+
+	return mbuf_dynflag->bitnum;
+}
+
+static int mbuf_dynflag_cmp(const struct rte_mbuf_dynflag *params1,
+		const struct rte_mbuf_dynflag *params2)
+{
+	if (strcmp(params1->name, params2->name))
+		return -1;
+	if (params1->flags != params2->flags)
+		return -1;
+	return 0;
+}
+
+/* assume tailq is locked */
+static int
+__rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
+				unsigned int req)
+{
+	struct mbuf_dynflag_list *mbuf_dynflag_list;
+	struct mbuf_dynflag_elt *mbuf_dynflag = NULL;
+	struct rte_tailq_entry *te = NULL;
+	unsigned int bitnum;
+	int ret;
+
+	if (shm == NULL && init_shared_mem() < 0)
+		return -1;
+
+	mbuf_dynflag = __mbuf_dynflag_lookup(params->name);
+	if (mbuf_dynflag != NULL) {
+		if (req != UINT_MAX && req != mbuf_dynflag->bitnum) {
+			rte_errno = EEXIST;
+			return -1;
+		}
+		if (mbuf_dynflag_cmp(params, &mbuf_dynflag->params) < 0) {
+			rte_errno = EEXIST;
+			return -1;
+		}
+		return mbuf_dynflag->bitnum;
+	}
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		rte_errno = EPERM;
+		return -1;
+	}
+
+	if (req == UINT_MAX) {
+		if (shm->free_flags == 0) {
+			rte_errno = ENOENT;
+			return -1;
+		}
+		bitnum = rte_bsf64(shm->free_flags);
+	} else {
+		if ((shm->free_flags & (1ULL << req)) == 0) {
+			rte_errno = EBUSY;
+			return -1;
+		}
+		bitnum = req;
+	}
+
+	mbuf_dynflag_list = RTE_TAILQ_CAST(
+		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
+
+	te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL)
+		return -1;
+
+	mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag), 0);
+	if (mbuf_dynflag == NULL) {
+		rte_free(te);
+		return -1;
+	}
+
+	ret = strlcpy(mbuf_dynflag->params.name, params->name,
+		sizeof(mbuf_dynflag->params.name));
+	if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->params.name)) {
+		rte_free(mbuf_dynflag);
+		rte_free(te);
+		rte_errno = ENAMETOOLONG;
+		return -1;
+	}
+	mbuf_dynflag->bitnum = bitnum;
+	te->data = mbuf_dynflag;
+
+	TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
+
+	shm->free_flags &= ~(1ULL << bitnum);
+
+	RTE_LOG(DEBUG, MBUF, "Registered dynamic flag %s (fl=0x%x) -> %u\n",
+		params->name, params->flags, bitnum);
+
+	return bitnum;
+}
+
+int
+rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
+				unsigned int req)
+{
+	int ret;
+
+	if (req != UINT_MAX && req >= 64) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	rte_mcfg_tailq_write_lock();
+	ret = __rte_mbuf_dynflag_register_bitnum(params, req);
+	rte_mcfg_tailq_write_unlock();
+
+	return ret;
+}
+
+int
+rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params)
+{
+	return rte_mbuf_dynflag_register_bitnum(params, UINT_MAX);
+}
+
+void rte_mbuf_dyn_dump(FILE *out)
+{
+	struct mbuf_dynfield_list *mbuf_dynfield_list;
+	struct mbuf_dynfield_elt *dynfield;
+	struct mbuf_dynflag_list *mbuf_dynflag_list;
+	struct mbuf_dynflag_elt *dynflag;
+	struct rte_tailq_entry *te;
+	size_t i;
+
+	rte_mcfg_tailq_write_lock();
+	init_shared_mem();
+	fprintf(out, "Reserved fields:\n");
+	mbuf_dynfield_list = RTE_TAILQ_CAST(
+		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
+	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
+		dynfield = (struct mbuf_dynfield_elt *)te->data;
+		fprintf(out, "  name=%s offset=%zd size=%zd align=%zd flags=%x\n",
+			dynfield->params.name, dynfield->offset,
+			dynfield->params.size, dynfield->params.align,
+			dynfield->params.flags);
+	}
+	fprintf(out, "Reserved flags:\n");
+	mbuf_dynflag_list = RTE_TAILQ_CAST(
+		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
+	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
+		dynflag = (struct mbuf_dynflag_elt *)te->data;
+		fprintf(out, "  name=%s bitnum=%u flags=%x\n",
+			dynflag->params.name, dynflag->bitnum,
+			dynflag->params.flags);
+	}
+	fprintf(out, "Free space in mbuf (0 = free, value = zone alignment):\n");
+	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
+		if ((i % 8) == 0)
+			fprintf(out, "  %4.4zx: ", i);
+		fprintf(out, "%2.2x%s", shm->free_space[i],
+			(i % 8 != 7) ? " " : "\n");
+	}
+	rte_mcfg_tailq_write_unlock();
+}
diff --git a/lib/librte_mbuf/rte_mbuf_dyn.h b/lib/librte_mbuf/rte_mbuf_dyn.h
new file mode 100644
index 000000000..307613c96
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf_dyn.h
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 6WIND S.A.
+ */
+
+#ifndef _RTE_MBUF_DYN_H_
+#define _RTE_MBUF_DYN_H_
+
+/**
+ * @file
+ * RTE Mbuf dynamic fields and flags
+ *
+ * Many features require to store data inside the mbuf. As the room in
+ * mbuf structure is limited, it is not possible to have a field for
+ * each feature. Also, changing fields in the mbuf structure can break
+ * the API or ABI.
+ *
+ * This module addresses this issue, by enabling the dynamic
+ * registration of fields or flags:
+ *
+ * - a dynamic field is a named area in the rte_mbuf structure, with a
+ *   given size (>= 1 byte) and alignment constraint.
+ * - a dynamic flag is a named bit in the rte_mbuf structure, stored
+ *   in mbuf->ol_flags.
+ *
+ * The typical use case is when a specific offload feature requires to
+ * register a dedicated offload field in the mbuf structure, and adding
+ * a static field or flag is not justified.
+ *
+ * Example of use:
+ *
+ * - A rte_mbuf_dynfield structure is defined, containing the parameters
+ *   of the dynamic field to be registered:
+ *   const struct rte_mbuf_dynfield rte_dynfield_my_feature = { ... };
+ * - The application initializes the PMD, and asks for this feature
+ *   at port initialization by passing DEV_RX_OFFLOAD_MY_FEATURE in
+ *   rxconf. This will make the PMD to register the field by calling
+ *   rte_mbuf_dynfield_register(&rte_dynfield_my_feature). The PMD
+ *   stores the returned offset.
+ * - The application that uses the offload feature also registers
+ *   the field to retrieve the same offset.
+ * - When the PMD receives a packet, it can set the field:
+ *   *RTE_MBUF_DYNFIELD(m, offset, <type *>) = value;
+ * - In the main loop, the application can retrieve the value with
+ *   the same macro.
+ *
+ * To avoid wasting space, the dynamic fields or flags must only be
+ * reserved on demand, when an application asks for the related feature.
+ *
+ * The registration can be done at any moment, but it is not possible
+ * to unregister fields or flags for now.
+ *
+ * A dynamic field can be reserved and used by an application only.
+ * It can for instance be a packet mark.
+ */
+
+#include <sys/types.h>
+/**
+ * Maximum length of the dynamic field or flag string.
+ */
+#define RTE_MBUF_DYN_NAMESIZE 64
+
+/**
+ * Structure describing the parameters of a mbuf dynamic field.
+ */
+struct rte_mbuf_dynfield {
+	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the field. */
+	size_t size;        /**< The number of bytes to reserve. */
+	size_t align;       /**< The alignment constraint (power of 2). */
+	unsigned int flags; /**< Reserved for future use, must be 0. */
+};
+
+/**
+ * Structure describing the parameters of a mbuf dynamic flag.
+ */
+struct rte_mbuf_dynflag {
+	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the dynamic flag. */
+	unsigned int flags; /**< Reserved for future use, must be 0. */
+};
+
+/**
+ * Register space for a dynamic field in the mbuf structure.
+ *
+ * If the field is already registered (same name and parameters), its
+ * offset is returned.
+ *
+ * @param params
+ *   A structure containing the requested parameters (name, size,
+ *   alignment constraint and flags).
+ * @return
+ *   The offset in the mbuf structure, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - EINVAL: invalid parameters (size, align, or flags).
+ *   - EEXIST: this name is already register with different parameters.
+ *   - EPERM: called from a secondary process.
+ *   - ENOENT: not enough room in mbuf.
+ *   - ENOMEM: allocation failure.
+ *   - ENAMETOOLONG: name does not ends with \0.
+ */
+__rte_experimental
+int rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params);
+
+/**
+ * Register space for a dynamic field in the mbuf structure at offset.
+ *
+ * If the field is already registered (same name, parameters and offset),
+ * the offset is returned.
+ *
+ * @param params
+ *   A structure containing the requested parameters (name, size,
+ *   alignment constraint and flags).
+ * @param offset
+ *   The requested offset. Ignored if SIZE_MAX is passed.
+ * @return
+ *   The offset in the mbuf structure, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - EINVAL: invalid parameters (size, align, flags, or offset).
+ *   - EEXIST: this name is already register with different parameters.
+ *   - EBUSY: the requested offset cannot be used.
+ *   - EPERM: called from a secondary process.
+ *   - ENOENT: not enough room in mbuf.
+ *   - ENOMEM: allocation failure.
+ *   - ENAMETOOLONG: name does not ends with \0.
+ */
+__rte_experimental
+int rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
+				size_t offset);
+
+/**
+ * Lookup for a registered dynamic mbuf field.
+ *
+ * @param name
+ *   A string identifying the dynamic field.
+ * @param params
+ *   If not NULL, and if the lookup is successful, the structure is
+ *   filled with the parameters of the dynamic field.
+ * @return
+ *   The offset of this field in the mbuf structure, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - ENOENT: no dynamic field matches this name.
+ */
+__rte_experimental
+int rte_mbuf_dynfield_lookup(const char *name,
+			struct rte_mbuf_dynfield *params);
+
+/**
+ * Register a dynamic flag in the mbuf structure.
+ *
+ * If the flag is already registered (same name and parameters), its
+ * bitnum is returned.
+ *
+ * @param params
+ *   A structure containing the requested parameters of the dynamic
+ *   flag (name and options).
+ * @return
+ *   The number of the reserved bit, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - EINVAL: invalid parameters (size, align, or flags).
+ *   - EEXIST: this name is already register with different parameters.
+ *   - EPERM: called from a secondary process.
+ *   - ENOENT: no more flag available.
+ *   - ENOMEM: allocation failure.
+ *   - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE - 1.
+ */
+__rte_experimental
+int rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params);
+
+/**
+ * Register a dynamic flag in the mbuf structure specifying bitnum.
+ *
+ * If the flag is already registered (same name, parameters and bitnum),
+ * the bitnum is returned.
+ *
+ * @param params
+ *   A structure containing the requested parameters of the dynamic
+ *   flag (name and options).
+ * @param bitnum
+ *   The requested bitnum. Ignored if UINT_MAX is passed.
+ * @return
+ *   The number of the reserved bit, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - EINVAL: invalid parameters (size, align, or flags).
+ *   - EEXIST: this name is already register with different parameters.
+ *   - EBUSY: the requested bitnum cannot be used.
+ *   - EPERM: called from a secondary process.
+ *   - ENOENT: no more flag available.
+ *   - ENOMEM: allocation failure.
+ *   - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE - 1.
+ */
+__rte_experimental
+int rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
+				unsigned int bitnum);
+
+/**
+ * Lookup for a registered dynamic mbuf flag.
+ *
+ * @param name
+ *   A string identifying the dynamic flag.
+ * @param params
+ *   If not NULL, and if the lookup is successful, the structure is
+ *   filled with the parameters of the dynamic flag.
+ * @return
+ *   The offset of this flag in the mbuf structure, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - ENOENT: no dynamic flag matches this name.
+ */
+__rte_experimental
+int rte_mbuf_dynflag_lookup(const char *name,
+			struct rte_mbuf_dynflag *params);
+
+/**
+ * Helper macro to access to a dynamic field.
+ */
+#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) + (offset)))
+
+/**
+ * Dump the status of dynamic fields and flags.
+ *
+ * @param out
+ *   The stream where the status is displayed.
+ */
+__rte_experimental
+void rte_mbuf_dyn_dump(FILE *out);
+
+/* Placeholder for dynamic fields and flags declarations. */
+
+#endif
diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
index 519fead35..9bf5ca37a 100644
--- a/lib/librte_mbuf/rte_mbuf_version.map
+++ b/lib/librte_mbuf/rte_mbuf_version.map
@@ -58,6 +58,13 @@ EXPERIMENTAL {
 	global:
 
 	rte_mbuf_check;
+	rte_mbuf_dynfield_lookup;
+	rte_mbuf_dynfield_register;
+	rte_mbuf_dynfield_register_offset;
+	rte_mbuf_dynflag_lookup;
+	rte_mbuf_dynflag_register;
+	rte_mbuf_dynflag_register_bitnum;
+	rte_mbuf_dyn_dump;
 	rte_pktmbuf_copy;
 
 } DPDK_18.08;
-- 
2.20.1


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-17 14:42 ` [dpdk-dev] [PATCH v2] " Olivier Matz
@ 2019-10-18  2:47   ` Wang, Haiyue
  2019-10-18  7:53     ` Olivier Matz
  2019-10-22 22:51   ` Ananyev, Konstantin
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 64+ messages in thread
From: Wang, Haiyue @ 2019-10-18  2:47 UTC (permalink / raw)
  To: Olivier Matz, dev
  Cc: Andrew Rybchenko, Richardson, Bruce, Jerin Jacob Kollanukkaran,
	Wiles, Keith, Ananyev, Konstantin, Morten Brørup,
	Stephen Hemminger, Thomas Monjalon

Hi Olivier

> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Thursday, October 17, 2019 22:42
> To: dev@dpdk.org
> Cc: Andrew Rybchenko <arybchenko@solarflare.com>; Richardson, Bruce <bruce.richardson@intel.com>; Wang,
> Haiyue <haiyue.wang@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> <keith.wiles@intel.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>; Morten Brørup
> <mb@smartsharesystems.com>; Stephen Hemminger <stephen@networkplumber.org>; Thomas Monjalon
> <thomas@monjalon.net>
> Subject: [PATCH v2] mbuf: support dynamic fields and flags
> 
> Many features require to store data inside the mbuf. As the room in mbuf
> structure is limited, it is not possible to have a field for each
> feature. Also, changing fields in the mbuf structure can break the API
> or ABI.
> 
> This commit addresses these issues, by enabling the dynamic registration
> of fields or flags:
> 
> - a dynamic field is a named area in the rte_mbuf structure, with a
>   given size (>= 1 byte) and alignment constraint.
> - a dynamic flag is a named bit in the rte_mbuf structure.
> 
> The typical use case is a PMD that registers space for an offload
> feature, when the application requests to enable this feature.  As
> the space in mbuf is limited, the space should only be reserved if it
> is going to be used (i.e when the application explicitly asks for it).
> 
> The registration can be done at any moment, but it is not possible
> to unregister fields or flags for now.
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> Acked-by: Thomas Monjalon <thomas@monjalon.net>
> ---
> 
> v2
> 
> * Rebase on top of master: solve conflict with Stephen's patchset
>   (packet copy)
> * Add new apis to register a dynamic field/flag at a specific place
> * Add a dump function (sugg by David)
> * Enhance field registration function to select the best offset, keeping
>   large aligned zones as much as possible (sugg by Konstantin)
> * Use a size_t and unsigned int instead of int when relevant
>   (sugg by Konstantin)
> * Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
>   (sugg by Konstantin)
> * Remove unused argument in private function (sugg by Konstantin)
> * Fix and simplify locking (sugg by Konstantin)
> * Fix minor typo
> 
> rfc -> v1
> 
> * Rebase on top of master
> * Change registration API to use a structure instead of
>   variables, getting rid of #defines (Stephen's comment)
> * Update flag registration to use a similar API as fields.
> * Change max name length from 32 to 64 (sugg. by Thomas)
> * Enhance API documentation (Haiyue's and Andrew's comments)
> * Add a debug log at registration
> * Add some words in release note
> * Did some performance tests (sugg. by Andrew):
>   On my platform, reading a dynamic field takes ~3 cycles more
>   than a static field, and ~2 cycles more for writing.
> 
>  app/test/test_mbuf.c                   | 145 ++++++-
>  doc/guides/rel_notes/release_19_11.rst |   7 +
>  lib/librte_mbuf/Makefile               |   2 +
>  lib/librte_mbuf/meson.build            |   6 +-
>  lib/librte_mbuf/rte_mbuf.h             |  23 +-
>  lib/librte_mbuf/rte_mbuf_dyn.c         | 548 +++++++++++++++++++++++++
>  lib/librte_mbuf/rte_mbuf_dyn.h         | 226 ++++++++++
>  lib/librte_mbuf/rte_mbuf_version.map   |   7 +
>  8 files changed, 959 insertions(+), 5 deletions(-)
>  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
>  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> 
> diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> index b9c2b2500..01cafad59 100644
> --- a/app/test/test_mbuf.c
> +++ b/app/test/test_mbuf.c
> @@ -28,6 +28,7 @@
>  #include <rte_random.h>

[snip]

> +/**
> + * Helper macro to access to a dynamic field.
> + */
> +#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) + (offset)))
> +

The suggested macro is missed ? ;-)
	/**
	 * Helper macro to access to a dynamic flag.
	 */
	#define RTE_MBUF_DYNFLAG(offset) (1ULL << (offset))


BTW, should we have a place to put the registered dynamic fields and flags
names together (a name overview -- detail Link to --> PMD's help page) ? 

Since rte_mbuf_dynfield:name & rte_mbuf_dynflag:name work as a API style,
users can check how many 'names' registered, developers can check whether
the names they want to use are registered or not ? They don't need to have
to check the rte_errno ... Just a suggestion for user experience.

> 
>  } DPDK_18.08;
> --
> 2.20.1


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-18  2:47   ` Wang, Haiyue
@ 2019-10-18  7:53     ` Olivier Matz
  2019-10-18  8:28       ` Wang, Haiyue
  0 siblings, 1 reply; 64+ messages in thread
From: Olivier Matz @ 2019-10-18  7:53 UTC (permalink / raw)
  To: Wang, Haiyue
  Cc: dev, Andrew Rybchenko, Richardson, Bruce,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Stephen Hemminger, Thomas Monjalon

Hi Haiyue,

On Fri, Oct 18, 2019 at 02:47:50AM +0000, Wang, Haiyue wrote:
> Hi Olivier
> 
> > -----Original Message-----
> > From: Olivier Matz [mailto:olivier.matz@6wind.com]
> > Sent: Thursday, October 17, 2019 22:42
> > To: dev@dpdk.org
> > Cc: Andrew Rybchenko <arybchenko@solarflare.com>; Richardson, Bruce <bruce.richardson@intel.com>; Wang,
> > Haiyue <haiyue.wang@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> > <keith.wiles@intel.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>; Morten Brørup
> > <mb@smartsharesystems.com>; Stephen Hemminger <stephen@networkplumber.org>; Thomas Monjalon
> > <thomas@monjalon.net>
> > Subject: [PATCH v2] mbuf: support dynamic fields and flags
> > 
> > Many features require to store data inside the mbuf. As the room in mbuf
> > structure is limited, it is not possible to have a field for each
> > feature. Also, changing fields in the mbuf structure can break the API
> > or ABI.
> > 
> > This commit addresses these issues, by enabling the dynamic registration
> > of fields or flags:
> > 
> > - a dynamic field is a named area in the rte_mbuf structure, with a
> >   given size (>= 1 byte) and alignment constraint.
> > - a dynamic flag is a named bit in the rte_mbuf structure.
> > 
> > The typical use case is a PMD that registers space for an offload
> > feature, when the application requests to enable this feature.  As
> > the space in mbuf is limited, the space should only be reserved if it
> > is going to be used (i.e when the application explicitly asks for it).
> > 
> > The registration can be done at any moment, but it is not possible
> > to unregister fields or flags for now.
> > 
> > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > ---
> > 
> > v2
> > 
> > * Rebase on top of master: solve conflict with Stephen's patchset
> >   (packet copy)
> > * Add new apis to register a dynamic field/flag at a specific place
> > * Add a dump function (sugg by David)
> > * Enhance field registration function to select the best offset, keeping
> >   large aligned zones as much as possible (sugg by Konstantin)
> > * Use a size_t and unsigned int instead of int when relevant
> >   (sugg by Konstantin)
> > * Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
> >   (sugg by Konstantin)
> > * Remove unused argument in private function (sugg by Konstantin)
> > * Fix and simplify locking (sugg by Konstantin)
> > * Fix minor typo
> > 
> > rfc -> v1
> > 
> > * Rebase on top of master
> > * Change registration API to use a structure instead of
> >   variables, getting rid of #defines (Stephen's comment)
> > * Update flag registration to use a similar API as fields.
> > * Change max name length from 32 to 64 (sugg. by Thomas)
> > * Enhance API documentation (Haiyue's and Andrew's comments)
> > * Add a debug log at registration
> > * Add some words in release note
> > * Did some performance tests (sugg. by Andrew):
> >   On my platform, reading a dynamic field takes ~3 cycles more
> >   than a static field, and ~2 cycles more for writing.
> > 
> >  app/test/test_mbuf.c                   | 145 ++++++-
> >  doc/guides/rel_notes/release_19_11.rst |   7 +
> >  lib/librte_mbuf/Makefile               |   2 +
> >  lib/librte_mbuf/meson.build            |   6 +-
> >  lib/librte_mbuf/rte_mbuf.h             |  23 +-
> >  lib/librte_mbuf/rte_mbuf_dyn.c         | 548 +++++++++++++++++++++++++
> >  lib/librte_mbuf/rte_mbuf_dyn.h         | 226 ++++++++++
> >  lib/librte_mbuf/rte_mbuf_version.map   |   7 +
> >  8 files changed, 959 insertions(+), 5 deletions(-)
> >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
> >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> > 
> > diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> > index b9c2b2500..01cafad59 100644
> > --- a/app/test/test_mbuf.c
> > +++ b/app/test/test_mbuf.c
> > @@ -28,6 +28,7 @@
> >  #include <rte_random.h>
> 
> [snip]
> 
> > +/**
> > + * Helper macro to access to a dynamic field.
> > + */
> > +#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) + (offset)))
> > +
> 
> The suggested macro is missed ? ;-)
> 	/**
> 	 * Helper macro to access to a dynamic flag.
> 	 */
> 	#define RTE_MBUF_DYNFLAG(offset) (1ULL << (offset))

Yes, sorry.

Thinking a bit more about it, I wonder if the macros below aren't
more consistent with the dynamic field (because they take the mbuf
as parameter)?

  #define RTE_MBUF_SET_DYNFLAG(m, bitnum, val) ...
  #define RTE_MBUF_GET_DYNFLAG(m, bitnum) ...

They could even be static inline functions.

On the other hand, these helpers would be generic to ol_flags, not only
for dynamic flags. Today, we use (1ULL << bit) for ol_flags, which makes
me wonder... is the macro really needed after all? :)

> BTW, should we have a place to put the registered dynamic fields and flags
> names together (a name overview -- detail Link to --> PMD's help page) ? 

The centralized place will be in rte_mbuf_dyn.h for fields/flags that can
are shared between several dpdk areas. Some libraries/pmd could have private
dynamic fields/flags. In any case, I think the same namespace than functions
should be used. Probably something like this:
 - "rte_mbuf_dynfield_<name>" in mbuf lib
 - "rte_<libname>_dynfield_<name>" in other libs
 - "rte_net_<pmd>_dynfield_<name>" in pmds
 - "<name>" in apps

> Since rte_mbuf_dynfield:name & rte_mbuf_dynflag:name work as a API style,
> users can check how many 'names' registered, developers can check whether
> the names they want to use are registered or not ? They don't need to have
> to check the rte_errno ... Just a suggestion for user experience.

I did not get you point. Does my response above answers to your question?

Regards,
Olivier

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-18  7:53     ` Olivier Matz
@ 2019-10-18  8:28       ` Wang, Haiyue
  2019-10-18  9:47         ` Olivier Matz
  0 siblings, 1 reply; 64+ messages in thread
From: Wang, Haiyue @ 2019-10-18  8:28 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Andrew Rybchenko, Richardson, Bruce,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Stephen Hemminger, Thomas Monjalon

Hi Olivier,

> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Friday, October 18, 2019 15:54
> To: Wang, Haiyue <haiyue.wang@intel.com>
> Cc: dev@dpdk.org; Andrew Rybchenko <arybchenko@solarflare.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> <keith.wiles@intel.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>; Morten Brørup
> <mb@smartsharesystems.com>; Stephen Hemminger <stephen@networkplumber.org>; Thomas Monjalon
> <thomas@monjalon.net>
> Subject: Re: [PATCH v2] mbuf: support dynamic fields and flags
> 
> Hi Haiyue,
> 
> On Fri, Oct 18, 2019 at 02:47:50AM +0000, Wang, Haiyue wrote:
> > Hi Olivier
> >
> > > -----Original Message-----
> > > From: Olivier Matz [mailto:olivier.matz@6wind.com]
> > > Sent: Thursday, October 17, 2019 22:42
> > > To: dev@dpdk.org
> > > Cc: Andrew Rybchenko <arybchenko@solarflare.com>; Richardson, Bruce <bruce.richardson@intel.com>;
> Wang,
> > > Haiyue <haiyue.wang@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> > > <keith.wiles@intel.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>; Morten Brørup
> > > <mb@smartsharesystems.com>; Stephen Hemminger <stephen@networkplumber.org>; Thomas Monjalon
> > > <thomas@monjalon.net>
> > > Subject: [PATCH v2] mbuf: support dynamic fields and flags
> > >
> > > Many features require to store data inside the mbuf. As the room in mbuf
> > > structure is limited, it is not possible to have a field for each
> > > feature. Also, changing fields in the mbuf structure can break the API
> > > or ABI.
> > >
> > > This commit addresses these issues, by enabling the dynamic registration
> > > of fields or flags:
> > >
> > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > >   given size (>= 1 byte) and alignment constraint.
> > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > >
> > > The typical use case is a PMD that registers space for an offload
> > > feature, when the application requests to enable this feature.  As
> > > the space in mbuf is limited, the space should only be reserved if it
> > > is going to be used (i.e when the application explicitly asks for it).
> > >
> > > The registration can be done at any moment, but it is not possible
> > > to unregister fields or flags for now.
> > >
> > > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > > ---
> > >
> > > v2
> > >
> > > * Rebase on top of master: solve conflict with Stephen's patchset
> > >   (packet copy)
> > > * Add new apis to register a dynamic field/flag at a specific place
> > > * Add a dump function (sugg by David)
> > > * Enhance field registration function to select the best offset, keeping
> > >   large aligned zones as much as possible (sugg by Konstantin)
> > > * Use a size_t and unsigned int instead of int when relevant
> > >   (sugg by Konstantin)
> > > * Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
> > >   (sugg by Konstantin)
> > > * Remove unused argument in private function (sugg by Konstantin)
> > > * Fix and simplify locking (sugg by Konstantin)
> > > * Fix minor typo
> > >
> > > rfc -> v1
> > >
> > > * Rebase on top of master
> > > * Change registration API to use a structure instead of
> > >   variables, getting rid of #defines (Stephen's comment)
> > > * Update flag registration to use a similar API as fields.
> > > * Change max name length from 32 to 64 (sugg. by Thomas)
> > > * Enhance API documentation (Haiyue's and Andrew's comments)
> > > * Add a debug log at registration
> > > * Add some words in release note
> > > * Did some performance tests (sugg. by Andrew):
> > >   On my platform, reading a dynamic field takes ~3 cycles more
> > >   than a static field, and ~2 cycles more for writing.
> > >
> > >  app/test/test_mbuf.c                   | 145 ++++++-
> > >  doc/guides/rel_notes/release_19_11.rst |   7 +
> > >  lib/librte_mbuf/Makefile               |   2 +
> > >  lib/librte_mbuf/meson.build            |   6 +-
> > >  lib/librte_mbuf/rte_mbuf.h             |  23 +-
> > >  lib/librte_mbuf/rte_mbuf_dyn.c         | 548 +++++++++++++++++++++++++
> > >  lib/librte_mbuf/rte_mbuf_dyn.h         | 226 ++++++++++
> > >  lib/librte_mbuf/rte_mbuf_version.map   |   7 +
> > >  8 files changed, 959 insertions(+), 5 deletions(-)
> > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
> > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> > >
> > > diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> > > index b9c2b2500..01cafad59 100644
> > > --- a/app/test/test_mbuf.c
> > > +++ b/app/test/test_mbuf.c
> > > @@ -28,6 +28,7 @@
> > >  #include <rte_random.h>
> >
> > [snip]
> >
> > > +/**
> > > + * Helper macro to access to a dynamic field.
> > > + */
> > > +#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) + (offset)))
> > > +
> >
> > The suggested macro is missed ? ;-)
> > 	/**
> > 	 * Helper macro to access to a dynamic flag.
> > 	 */
> > 	#define RTE_MBUF_DYNFLAG(offset) (1ULL << (offset))
> 
> Yes, sorry.
> 
> Thinking a bit more about it, I wonder if the macros below aren't
> more consistent with the dynamic field (because they take the mbuf
> as parameter)?
> 
>   #define RTE_MBUF_SET_DYNFLAG(m, bitnum, val) ...
>   #define RTE_MBUF_GET_DYNFLAG(m, bitnum) ...
> 
> They could even be static inline functions.
> 
> On the other hand, these helpers would be generic to ol_flags, not only
> for dynamic flags. Today, we use (1ULL << bit) for ol_flags, which makes
> me wonder... is the macro really needed after all? :)
> 

I used as this:
	1). 	in PMD:
		mb->ol_flags |= RTE_MBUF_DYNFLAG(ol_offset); 


	2). In testpmd
		if (mb->ol_flags & RTE_MBUF_DYNFLAG(ol_offset))
			...

The above two macros look better in real use.

> > BTW, should we have a place to put the registered dynamic fields and flags
> > names together (a name overview -- detail Link to --> PMD's help page) ?
> 
> The centralized place will be in rte_mbuf_dyn.h for fields/flags that can
> are shared between several dpdk areas. Some libraries/pmd could have private
> dynamic fields/flags. In any case, I think the same namespace than functions
> should be used. Probably something like this:
>  - "rte_mbuf_dynfield_<name>" in mbuf lib
>  - "rte_<libname>_dynfield_<name>" in other libs
>  - "rte_net_<pmd>_dynfield_<name>" in pmds
>  - "<name>" in apps
> 
> > Since rte_mbuf_dynfield:name & rte_mbuf_dynflag:name work as a API style,
> > users can check how many 'names' registered, developers can check whether
> > the names they want to use are registered or not ? They don't need to have
> > to check the rte_errno ... Just a suggestion for user experience.
> 
> I did not get you point. Does my response above answers to your question?
> 

Yes, the name conversation you mentioned above is a good practice, then no doc
needed any more, thanks!

> Regards,
> Olivier

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-18  8:28       ` Wang, Haiyue
@ 2019-10-18  9:47         ` Olivier Matz
  2019-10-18 11:24           ` Wang, Haiyue
  0 siblings, 1 reply; 64+ messages in thread
From: Olivier Matz @ 2019-10-18  9:47 UTC (permalink / raw)
  To: Wang, Haiyue
  Cc: dev, Andrew Rybchenko, Richardson, Bruce,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Stephen Hemminger, Thomas Monjalon

On Fri, Oct 18, 2019 at 08:28:02AM +0000, Wang, Haiyue wrote:
> Hi Olivier,
> 
> > -----Original Message-----
> > From: Olivier Matz [mailto:olivier.matz@6wind.com]
> > Sent: Friday, October 18, 2019 15:54
> > To: Wang, Haiyue <haiyue.wang@intel.com>
> > Cc: dev@dpdk.org; Andrew Rybchenko <arybchenko@solarflare.com>; Richardson, Bruce
> > <bruce.richardson@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> > <keith.wiles@intel.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>; Morten Brørup
> > <mb@smartsharesystems.com>; Stephen Hemminger <stephen@networkplumber.org>; Thomas Monjalon
> > <thomas@monjalon.net>
> > Subject: Re: [PATCH v2] mbuf: support dynamic fields and flags
> > 
> > Hi Haiyue,
> > 
> > On Fri, Oct 18, 2019 at 02:47:50AM +0000, Wang, Haiyue wrote:
> > > Hi Olivier
> > >
> > > > -----Original Message-----
> > > > From: Olivier Matz [mailto:olivier.matz@6wind.com]
> > > > Sent: Thursday, October 17, 2019 22:42
> > > > To: dev@dpdk.org
> > > > Cc: Andrew Rybchenko <arybchenko@solarflare.com>; Richardson, Bruce <bruce.richardson@intel.com>;
> > Wang,
> > > > Haiyue <haiyue.wang@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> > > > <keith.wiles@intel.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>; Morten Brørup
> > > > <mb@smartsharesystems.com>; Stephen Hemminger <stephen@networkplumber.org>; Thomas Monjalon
> > > > <thomas@monjalon.net>
> > > > Subject: [PATCH v2] mbuf: support dynamic fields and flags
> > > >
> > > > Many features require to store data inside the mbuf. As the room in mbuf
> > > > structure is limited, it is not possible to have a field for each
> > > > feature. Also, changing fields in the mbuf structure can break the API
> > > > or ABI.
> > > >
> > > > This commit addresses these issues, by enabling the dynamic registration
> > > > of fields or flags:
> > > >
> > > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > > >   given size (>= 1 byte) and alignment constraint.
> > > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > > >
> > > > The typical use case is a PMD that registers space for an offload
> > > > feature, when the application requests to enable this feature.  As
> > > > the space in mbuf is limited, the space should only be reserved if it
> > > > is going to be used (i.e when the application explicitly asks for it).
> > > >
> > > > The registration can be done at any moment, but it is not possible
> > > > to unregister fields or flags for now.
> > > >
> > > > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > > > ---
> > > >
> > > > v2
> > > >
> > > > * Rebase on top of master: solve conflict with Stephen's patchset
> > > >   (packet copy)
> > > > * Add new apis to register a dynamic field/flag at a specific place
> > > > * Add a dump function (sugg by David)
> > > > * Enhance field registration function to select the best offset, keeping
> > > >   large aligned zones as much as possible (sugg by Konstantin)
> > > > * Use a size_t and unsigned int instead of int when relevant
> > > >   (sugg by Konstantin)
> > > > * Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
> > > >   (sugg by Konstantin)
> > > > * Remove unused argument in private function (sugg by Konstantin)
> > > > * Fix and simplify locking (sugg by Konstantin)
> > > > * Fix minor typo
> > > >
> > > > rfc -> v1
> > > >
> > > > * Rebase on top of master
> > > > * Change registration API to use a structure instead of
> > > >   variables, getting rid of #defines (Stephen's comment)
> > > > * Update flag registration to use a similar API as fields.
> > > > * Change max name length from 32 to 64 (sugg. by Thomas)
> > > > * Enhance API documentation (Haiyue's and Andrew's comments)
> > > > * Add a debug log at registration
> > > > * Add some words in release note
> > > > * Did some performance tests (sugg. by Andrew):
> > > >   On my platform, reading a dynamic field takes ~3 cycles more
> > > >   than a static field, and ~2 cycles more for writing.
> > > >
> > > >  app/test/test_mbuf.c                   | 145 ++++++-
> > > >  doc/guides/rel_notes/release_19_11.rst |   7 +
> > > >  lib/librte_mbuf/Makefile               |   2 +
> > > >  lib/librte_mbuf/meson.build            |   6 +-
> > > >  lib/librte_mbuf/rte_mbuf.h             |  23 +-
> > > >  lib/librte_mbuf/rte_mbuf_dyn.c         | 548 +++++++++++++++++++++++++
> > > >  lib/librte_mbuf/rte_mbuf_dyn.h         | 226 ++++++++++
> > > >  lib/librte_mbuf/rte_mbuf_version.map   |   7 +
> > > >  8 files changed, 959 insertions(+), 5 deletions(-)
> > > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
> > > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> > > >
> > > > diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> > > > index b9c2b2500..01cafad59 100644
> > > > --- a/app/test/test_mbuf.c
> > > > +++ b/app/test/test_mbuf.c
> > > > @@ -28,6 +28,7 @@
> > > >  #include <rte_random.h>
> > >
> > > [snip]
> > >
> > > > +/**
> > > > + * Helper macro to access to a dynamic field.
> > > > + */
> > > > +#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) + (offset)))
> > > > +
> > >
> > > The suggested macro is missed ? ;-)
> > > 	/**
> > > 	 * Helper macro to access to a dynamic flag.
> > > 	 */
> > > 	#define RTE_MBUF_DYNFLAG(offset) (1ULL << (offset))
> > 
> > Yes, sorry.
> > 
> > Thinking a bit more about it, I wonder if the macros below aren't
> > more consistent with the dynamic field (because they take the mbuf
> > as parameter)?
> > 
> >   #define RTE_MBUF_SET_DYNFLAG(m, bitnum, val) ...
> >   #define RTE_MBUF_GET_DYNFLAG(m, bitnum) ...
> > 
> > They could even be static inline functions.
> > 
> > On the other hand, these helpers would be generic to ol_flags, not only
> > for dynamic flags. Today, we use (1ULL << bit) for ol_flags, which makes
> > me wonder... is the macro really needed after all? :)
> > 
> 
> I used as this:
> 	1). 	in PMD:
> 		mb->ol_flags |= RTE_MBUF_DYNFLAG(ol_offset); 
> 
> 
> 	2). In testpmd
> 		if (mb->ol_flags & RTE_MBUF_DYNFLAG(ol_offset))
> 			...
> 
> The above two macros look better in real use.

I just looked at http://patchwork.dpdk.org/patch/60908/
In the patch, a mask is used instead of a bit number, which is indeed
better in terms of performance. This makes the macro not that useful,
given there is a specific helper.


> > > BTW, should we have a place to put the registered dynamic fields and flags
> > > names together (a name overview -- detail Link to --> PMD's help page) ?
> > 
> > The centralized place will be in rte_mbuf_dyn.h for fields/flags that can
> > are shared between several dpdk areas. Some libraries/pmd could have private
> > dynamic fields/flags. In any case, I think the same namespace than functions
> > should be used. Probably something like this:
> >  - "rte_mbuf_dynfield_<name>" in mbuf lib
> >  - "rte_<libname>_dynfield_<name>" in other libs
> >  - "rte_net_<pmd>_dynfield_<name>" in pmds
> >  - "<name>" in apps
> > 
> > > Since rte_mbuf_dynfield:name & rte_mbuf_dynflag:name work as a API style,
> > > users can check how many 'names' registered, developers can check whether
> > > the names they want to use are registered or not ? They don't need to have
> > > to check the rte_errno ... Just a suggestion for user experience.
> > 
> > I did not get you point. Does my response above answers to your question?
> > 
> 
> Yes, the name conversation you mentioned above is a good practice, then no doc
> needed any more, thanks!
> 
> > Regards,
> > Olivier

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-18  9:47         ` Olivier Matz
@ 2019-10-18 11:24           ` Wang, Haiyue
  0 siblings, 0 replies; 64+ messages in thread
From: Wang, Haiyue @ 2019-10-18 11:24 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Andrew Rybchenko, Richardson, Bruce,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Stephen Hemminger, Thomas Monjalon

> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Friday, October 18, 2019 17:48
> To: Wang, Haiyue <haiyue.wang@intel.com>
> Cc: dev@dpdk.org; Andrew Rybchenko <arybchenko@solarflare.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> <keith.wiles@intel.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>; Morten Brørup
> <mb@smartsharesystems.com>; Stephen Hemminger <stephen@networkplumber.org>; Thomas Monjalon
> <thomas@monjalon.net>
> Subject: Re: [PATCH v2] mbuf: support dynamic fields and flags
> 
> On Fri, Oct 18, 2019 at 08:28:02AM +0000, Wang, Haiyue wrote:
> > Hi Olivier,
> >
> > > -----Original Message-----
> > > From: Olivier Matz [mailto:olivier.matz@6wind.com]
> > > Sent: Friday, October 18, 2019 15:54
> > > To: Wang, Haiyue <haiyue.wang@intel.com>
> > > Cc: dev@dpdk.org; Andrew Rybchenko <arybchenko@solarflare.com>; Richardson, Bruce
> > > <bruce.richardson@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> > > <keith.wiles@intel.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>; Morten Brørup
> > > <mb@smartsharesystems.com>; Stephen Hemminger <stephen@networkplumber.org>; Thomas Monjalon
> > > <thomas@monjalon.net>
> > > Subject: Re: [PATCH v2] mbuf: support dynamic fields and flags
> > >
> > > Hi Haiyue,
> > >
> > > On Fri, Oct 18, 2019 at 02:47:50AM +0000, Wang, Haiyue wrote:
> > > > Hi Olivier
> > > >
> > > > > -----Original Message-----
> > > > > From: Olivier Matz [mailto:olivier.matz@6wind.com]
> > > > > Sent: Thursday, October 17, 2019 22:42
> > > > > To: dev@dpdk.org
> > > > > Cc: Andrew Rybchenko <arybchenko@solarflare.com>; Richardson, Bruce
> <bruce.richardson@intel.com>;
> > > Wang,
> > > > > Haiyue <haiyue.wang@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> > > > > <keith.wiles@intel.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>; Morten Brørup
> > > > > <mb@smartsharesystems.com>; Stephen Hemminger <stephen@networkplumber.org>; Thomas Monjalon
> > > > > <thomas@monjalon.net>
> > > > > Subject: [PATCH v2] mbuf: support dynamic fields and flags
> > > > >
> > > > > Many features require to store data inside the mbuf. As the room in mbuf
> > > > > structure is limited, it is not possible to have a field for each
> > > > > feature. Also, changing fields in the mbuf structure can break the API
> > > > > or ABI.
> > > > >
> > > > > This commit addresses these issues, by enabling the dynamic registration
> > > > > of fields or flags:
> > > > >
> > > > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > > > >   given size (>= 1 byte) and alignment constraint.
> > > > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > > > >
> > > > > The typical use case is a PMD that registers space for an offload
> > > > > feature, when the application requests to enable this feature.  As
> > > > > the space in mbuf is limited, the space should only be reserved if it
> > > > > is going to be used (i.e when the application explicitly asks for it).
> > > > >
> > > > > The registration can be done at any moment, but it is not possible
> > > > > to unregister fields or flags for now.
> > > > >
> > > > > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > > > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > > > > ---
> > > > >
> > > > > v2
> > > > >
> > > > > * Rebase on top of master: solve conflict with Stephen's patchset
> > > > >   (packet copy)
> > > > > * Add new apis to register a dynamic field/flag at a specific place
> > > > > * Add a dump function (sugg by David)
> > > > > * Enhance field registration function to select the best offset, keeping
> > > > >   large aligned zones as much as possible (sugg by Konstantin)
> > > > > * Use a size_t and unsigned int instead of int when relevant
> > > > >   (sugg by Konstantin)
> > > > > * Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
> > > > >   (sugg by Konstantin)
> > > > > * Remove unused argument in private function (sugg by Konstantin)
> > > > > * Fix and simplify locking (sugg by Konstantin)
> > > > > * Fix minor typo
> > > > >
> > > > > rfc -> v1
> > > > >
> > > > > * Rebase on top of master
> > > > > * Change registration API to use a structure instead of
> > > > >   variables, getting rid of #defines (Stephen's comment)
> > > > > * Update flag registration to use a similar API as fields.
> > > > > * Change max name length from 32 to 64 (sugg. by Thomas)
> > > > > * Enhance API documentation (Haiyue's and Andrew's comments)
> > > > > * Add a debug log at registration
> > > > > * Add some words in release note
> > > > > * Did some performance tests (sugg. by Andrew):
> > > > >   On my platform, reading a dynamic field takes ~3 cycles more
> > > > >   than a static field, and ~2 cycles more for writing.
> > > > >
> > > > >  app/test/test_mbuf.c                   | 145 ++++++-
> > > > >  doc/guides/rel_notes/release_19_11.rst |   7 +
> > > > >  lib/librte_mbuf/Makefile               |   2 +
> > > > >  lib/librte_mbuf/meson.build            |   6 +-
> > > > >  lib/librte_mbuf/rte_mbuf.h             |  23 +-
> > > > >  lib/librte_mbuf/rte_mbuf_dyn.c         | 548 +++++++++++++++++++++++++
> > > > >  lib/librte_mbuf/rte_mbuf_dyn.h         | 226 ++++++++++
> > > > >  lib/librte_mbuf/rte_mbuf_version.map   |   7 +
> > > > >  8 files changed, 959 insertions(+), 5 deletions(-)
> > > > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
> > > > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> > > > >
> > > > > diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> > > > > index b9c2b2500..01cafad59 100644
> > > > > --- a/app/test/test_mbuf.c
> > > > > +++ b/app/test/test_mbuf.c
> > > > > @@ -28,6 +28,7 @@
> > > > >  #include <rte_random.h>
> > > >
> > > > [snip]
> > > >
> > > > > +/**
> > > > > + * Helper macro to access to a dynamic field.
> > > > > + */
> > > > > +#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) + (offset)))
> > > > > +
> > > >
> > > > The suggested macro is missed ? ;-)
> > > > 	/**
> > > > 	 * Helper macro to access to a dynamic flag.
> > > > 	 */
> > > > 	#define RTE_MBUF_DYNFLAG(offset) (1ULL << (offset))
> > >
> > > Yes, sorry.
> > >
> > > Thinking a bit more about it, I wonder if the macros below aren't
> > > more consistent with the dynamic field (because they take the mbuf
> > > as parameter)?
> > >
> > >   #define RTE_MBUF_SET_DYNFLAG(m, bitnum, val) ...
> > >   #define RTE_MBUF_GET_DYNFLAG(m, bitnum) ...
> > >
> > > They could even be static inline functions.
> > >
> > > On the other hand, these helpers would be generic to ol_flags, not only
> > > for dynamic flags. Today, we use (1ULL << bit) for ol_flags, which makes
> > > me wonder... is the macro really needed after all? :)
> > >
> >
> > I used as this:
> > 	1). 	in PMD:
> > 		mb->ol_flags |= RTE_MBUF_DYNFLAG(ol_offset);
> >
> >
> > 	2). In testpmd
> > 		if (mb->ol_flags & RTE_MBUF_DYNFLAG(ol_offset))
> > 			...
> >
> > The above two macros look better in real use.
> 
> I just looked at http://patchwork.dpdk.org/patch/60908/
> In the patch, a mask is used instead of a bit number, which is indeed
> better in terms of performance. This makes the macro not that useful,
> given there is a specific helper.
> 

'a mask is used instead of a bit number' good practice, yes, then no need
this macro, thanks for sharing. ;-)


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-17 14:42 ` [dpdk-dev] [PATCH v2] " Olivier Matz
  2019-10-18  2:47   ` Wang, Haiyue
@ 2019-10-22 22:51   ` Ananyev, Konstantin
  2019-10-23  3:16     ` Wang, Haiyue
  2019-10-23 10:19     ` Olivier Matz
  2019-10-23 12:00   ` Shahaf Shuler
  2019-10-24  7:38   ` Slava Ovsiienko
  3 siblings, 2 replies; 64+ messages in thread
From: Ananyev, Konstantin @ 2019-10-22 22:51 UTC (permalink / raw)
  To: Olivier Matz, dev
  Cc: Andrew Rybchenko, Richardson, Bruce, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Morten Brørup,
	Stephen Hemminger, Thomas Monjalon


> Many features require to store data inside the mbuf. As the room in mbuf
> structure is limited, it is not possible to have a field for each
> feature. Also, changing fields in the mbuf structure can break the API
> or ABI.
> 
> This commit addresses these issues, by enabling the dynamic registration
> of fields or flags:
> 
> - a dynamic field is a named area in the rte_mbuf structure, with a
>   given size (>= 1 byte) and alignment constraint.
> - a dynamic flag is a named bit in the rte_mbuf structure.
> 
> The typical use case is a PMD that registers space for an offload
> feature, when the application requests to enable this feature.  As
> the space in mbuf is limited, the space should only be reserved if it
> is going to be used (i.e when the application explicitly asks for it).
> 
> The registration can be done at any moment, but it is not possible
> to unregister fields or flags for now.
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> Acked-by: Thomas Monjalon <thomas@monjalon.net>
> ---
> 
> v2
> 
> * Rebase on top of master: solve conflict with Stephen's patchset
>   (packet copy)
> * Add new apis to register a dynamic field/flag at a specific place
> * Add a dump function (sugg by David)
> * Enhance field registration function to select the best offset, keeping
>   large aligned zones as much as possible (sugg by Konstantin)
> * Use a size_t and unsigned int instead of int when relevant
>   (sugg by Konstantin)
> * Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
>   (sugg by Konstantin)
> * Remove unused argument in private function (sugg by Konstantin)
> * Fix and simplify locking (sugg by Konstantin)
> * Fix minor typo
> 
> rfc -> v1
> 
> * Rebase on top of master
> * Change registration API to use a structure instead of
>   variables, getting rid of #defines (Stephen's comment)
> * Update flag registration to use a similar API as fields.
> * Change max name length from 32 to 64 (sugg. by Thomas)
> * Enhance API documentation (Haiyue's and Andrew's comments)
> * Add a debug log at registration
> * Add some words in release note
> * Did some performance tests (sugg. by Andrew):
>   On my platform, reading a dynamic field takes ~3 cycles more
>   than a static field, and ~2 cycles more for writing.
> 
>  app/test/test_mbuf.c                   | 145 ++++++-
>  doc/guides/rel_notes/release_19_11.rst |   7 +
>  lib/librte_mbuf/Makefile               |   2 +
>  lib/librte_mbuf/meson.build            |   6 +-
>  lib/librte_mbuf/rte_mbuf.h             |  23 +-
>  lib/librte_mbuf/rte_mbuf_dyn.c         | 548 +++++++++++++++++++++++++
>  lib/librte_mbuf/rte_mbuf_dyn.h         | 226 ++++++++++
>  lib/librte_mbuf/rte_mbuf_version.map   |   7 +
>  8 files changed, 959 insertions(+), 5 deletions(-)
>  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
>  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> 
> diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> index b9c2b2500..01cafad59 100644
> --- a/app/test/test_mbuf.c
> +++ b/app/test/test_mbuf.c
> @@ -28,6 +28,7 @@
>  #include <rte_random.h>
>  #include <rte_cycles.h>
>  #include <rte_malloc.h>
> +#include <rte_mbuf_dyn.h>
> 
>  #include "test.h"
> 
> @@ -657,7 +658,6 @@ test_attach_from_different_pool(struct rte_mempool *pktmbuf_pool,
>  		rte_pktmbuf_free(clone2);
>  	return -1;
>  }
> -#undef GOTO_FAIL
> 
>  /*
>   * test allocation and free of mbufs
> @@ -1276,6 +1276,143 @@ test_tx_offload(void)
>  	return (v1 == v2) ? 0 : -EINVAL;
>  }
> 
> +static int
> +test_mbuf_dyn(struct rte_mempool *pktmbuf_pool)
> +{
> +	const struct rte_mbuf_dynfield dynfield = {
> +		.name = "test-dynfield",
> +		.size = sizeof(uint8_t),
> +		.align = __alignof__(uint8_t),
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynfield dynfield2 = {
> +		.name = "test-dynfield2",
> +		.size = sizeof(uint16_t),
> +		.align = __alignof__(uint16_t),
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynfield dynfield3 = {
> +		.name = "test-dynfield3",
> +		.size = sizeof(uint8_t),
> +		.align = __alignof__(uint8_t),
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynfield dynfield_fail_big = {
> +		.name = "test-dynfield-fail-big",
> +		.size = 256,
> +		.align = 1,
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynfield dynfield_fail_align = {
> +		.name = "test-dynfield-fail-align",
> +		.size = 1,
> +		.align = 3,
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynflag dynflag = {
> +		.name = "test-dynflag",
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynflag dynflag2 = {
> +		.name = "test-dynflag2",
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynflag dynflag3 = {
> +		.name = "test-dynflag3",
> +		.flags = 0,
> +	};
> +	struct rte_mbuf *m = NULL;
> +	int offset, offset2, offset3;
> +	int flag, flag2, flag3;
> +	int ret;
> +
> +	printf("Test mbuf dynamic fields and flags\n");
> +	rte_mbuf_dyn_dump(stdout);
> +
> +	offset = rte_mbuf_dynfield_register(&dynfield);
> +	if (offset == -1)
> +		GOTO_FAIL("failed to register dynamic field, offset=%d: %s",
> +			offset, strerror(errno));
> +
> +	ret = rte_mbuf_dynfield_register(&dynfield);
> +	if (ret != offset)
> +		GOTO_FAIL("failed to lookup dynamic field, ret=%d: %s",
> +			ret, strerror(errno));
> +
> +	offset2 = rte_mbuf_dynfield_register(&dynfield2);
> +	if (offset2 == -1 || offset2 == offset || (offset2 & 1))
> +		GOTO_FAIL("failed to register dynamic field 2, offset2=%d: %s",
> +			offset2, strerror(errno));
> +
> +	offset3 = rte_mbuf_dynfield_register_offset(&dynfield3,
> +				offsetof(struct rte_mbuf, dynfield1[1]));
> +	if (offset3 != offsetof(struct rte_mbuf, dynfield1[1]))
> +		GOTO_FAIL("failed to register dynamic field 3, offset=%d: %s",
> +			offset3, strerror(errno));
> +
> +	printf("dynfield: offset=%d, offset2=%d, offset3=%d\n",
> +		offset, offset2, offset3);
> +
> +	ret = rte_mbuf_dynfield_register(&dynfield_fail_big);
> +	if (ret != -1)
> +		GOTO_FAIL("dynamic field creation should fail (too big)");
> +
> +	ret = rte_mbuf_dynfield_register(&dynfield_fail_align);
> +	if (ret != -1)
> +		GOTO_FAIL("dynamic field creation should fail (bad alignment)");
> +
> +	ret = rte_mbuf_dynfield_register_offset(&dynfield_fail_align,
> +				offsetof(struct rte_mbuf, ol_flags));
> +	if (ret != -1)
> +		GOTO_FAIL("dynamic field creation should fail (not avail)");
> +
> +	flag = rte_mbuf_dynflag_register(&dynflag);
> +	if (flag == -1)
> +		GOTO_FAIL("failed to register dynamic flag, flag=%d: %s",
> +			flag, strerror(errno));
> +
> +	ret = rte_mbuf_dynflag_register(&dynflag);
> +	if (ret != flag)
> +		GOTO_FAIL("failed to lookup dynamic flag, ret=%d: %s",
> +			ret, strerror(errno));
> +
> +	flag2 = rte_mbuf_dynflag_register(&dynflag2);
> +	if (flag2 == -1 || flag2 == flag)
> +		GOTO_FAIL("failed to register dynamic flag 2, flag2=%d: %s",
> +			flag2, strerror(errno));
> +
> +	flag3 = rte_mbuf_dynflag_register_bitnum(&dynflag3,
> +						rte_bsf64(PKT_LAST_FREE));
> +	if (flag3 != rte_bsf64(PKT_LAST_FREE))
> +		GOTO_FAIL("failed to register dynamic flag 3, flag2=%d: %s",
> +			flag3, strerror(errno));
> +
> +	printf("dynflag: flag=%d, flag2=%d, flag3=%d\n", flag, flag2, flag3);
> +
> +	/* set, get dynamic field */
> +	m = rte_pktmbuf_alloc(pktmbuf_pool);
> +	if (m == NULL)
> +		GOTO_FAIL("Cannot allocate mbuf");
> +
> +	*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) = 1;
> +	if (*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) != 1)
> +		GOTO_FAIL("failed to read dynamic field");
> +	*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) = 1000;
> +	if (*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) != 1000)
> +		GOTO_FAIL("failed to read dynamic field");
> +
> +	/* set a dynamic flag */
> +	m->ol_flags |= (1ULL << flag);
> +
> +	rte_mbuf_dyn_dump(stdout);
> +	rte_pktmbuf_free(m);
> +	return 0;
> +fail:
> +	rte_pktmbuf_free(m);
> +	return -1;
> +}
> +#undef GOTO_FAIL
> +
>  static int
>  test_mbuf(void)
>  {
> @@ -1295,6 +1432,12 @@ test_mbuf(void)
>  		goto err;
>  	}
> 
> +	/* test registration of dynamic fields and flags */
> +	if (test_mbuf_dyn(pktmbuf_pool) < 0) {
> +		printf("mbuf dynflag test failed\n");
> +		goto err;
> +	}
> +
>  	/* create a specific pktmbuf pool with a priv_size != 0 and no data
>  	 * room size */
>  	pktmbuf_pool2 = rte_pktmbuf_pool_create("test_pktmbuf_pool2",
> diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst
> index 85953b962..9e9c94554 100644
> --- a/doc/guides/rel_notes/release_19_11.rst
> +++ b/doc/guides/rel_notes/release_19_11.rst
> @@ -21,6 +21,13 @@ DPDK Release 19.11
> 
>        xdg-open build/doc/html/guides/rel_notes/release_19_11.html
> 
> +* **Add support of support dynamic fields and flags in mbuf.**
> +
> +  This new feature adds the ability to dynamically register some room
> +  for a field or a flag in the mbuf structure. This is typically used
> +  for specific offload features, where adding a static field or flag
> +  in the mbuf is not justified.
> +
> 
>  New Features
>  ------------
> diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile
> index c8f6d2689..5a9bcee73 100644
> --- a/lib/librte_mbuf/Makefile
> +++ b/lib/librte_mbuf/Makefile
> @@ -17,8 +17,10 @@ LIBABIVER := 5
> 
>  # all source are stored in SRCS-y
>  SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c rte_mbuf_pool_ops.c
> +SRCS-$(CONFIG_RTE_LIBRTE_MBUF) += rte_mbuf_dyn.c
> 
>  # install includes
>  SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h rte_mbuf_ptype.h rte_mbuf_pool_ops.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include += rte_mbuf_dyn.h
> 
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build
> index 6cc11ebb4..9137e8f26 100644
> --- a/lib/librte_mbuf/meson.build
> +++ b/lib/librte_mbuf/meson.build
> @@ -2,8 +2,10 @@
>  # Copyright(c) 2017 Intel Corporation
> 
>  version = 5
> -sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c')
> -headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
> +sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c',
> +	'rte_mbuf_dyn.c')
> +headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h',
> +	'rte_mbuf_dyn.h')
>  deps += ['mempool']
> 
>  allow_experimental_apis = true
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index fb0849ac1..5740b1e93 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -198,9 +198,12 @@ extern "C" {
>  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
>  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
> 
> -/* add new RX flags here */
> +/* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> 
> -/* add new TX flags here */
> +#define PKT_FIRST_FREE (1ULL << 23)
> +#define PKT_LAST_FREE (1ULL << 39)
> +
> +/* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> 
>  /**
>   * Indicate that the metadata field in the mbuf is in use.
> @@ -738,6 +741,7 @@ struct rte_mbuf {
>  	 */
>  	struct rte_mbuf_ext_shared_info *shinfo;
> 
> +	uint64_t dynfield1[2]; /**< Reserved for dynamic fields. */
>  } __rte_cache_aligned;
> 
>  /**
> @@ -1684,6 +1688,20 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
>   */
>  #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
> 
> +/**
> + * Copy dynamic fields from m_src to m_dst.
> + *
> + * @param m_dst
> + *   The destination mbuf.
> + * @param m_src
> + *   The source mbuf.
> + */
> +static inline void
> +rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
> +{
> +	memcpy(&mdst->dynfield1, msrc->dynfield1, sizeof(mdst->dynfield1));
> +}
> +
>  /* internal */
>  static inline void
>  __rte_pktmbuf_copy_hdr(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
> @@ -1695,6 +1713,7 @@ __rte_pktmbuf_copy_hdr(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
>  	mdst->hash = msrc->hash;
>  	mdst->packet_type = msrc->packet_type;
>  	mdst->timestamp = msrc->timestamp;
> +	rte_mbuf_dynfield_copy(mdst, msrc);
>  }
> 
>  /**
> diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c b/lib/librte_mbuf/rte_mbuf_dyn.c
> new file mode 100644
> index 000000000..9ef235483
> --- /dev/null
> +++ b/lib/librte_mbuf/rte_mbuf_dyn.c
> @@ -0,0 +1,548 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2019 6WIND S.A.
> + */
> +
> +#include <sys/queue.h>
> +#include <stdint.h>
> +#include <limits.h>
> +
> +#include <rte_common.h>
> +#include <rte_eal.h>
> +#include <rte_eal_memconfig.h>
> +#include <rte_tailq.h>
> +#include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_string_fns.h>
> +#include <rte_mbuf.h>
> +#include <rte_mbuf_dyn.h>
> +
> +#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
> +
> +struct mbuf_dynfield_elt {
> +	TAILQ_ENTRY(mbuf_dynfield_elt) next;
> +	struct rte_mbuf_dynfield params;
> +	size_t offset;
> +};
> +TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem mbuf_dynfield_tailq = {
> +	.name = "RTE_MBUF_DYNFIELD",
> +};
> +EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
> +
> +struct mbuf_dynflag_elt {
> +	TAILQ_ENTRY(mbuf_dynflag_elt) next;
> +	struct rte_mbuf_dynflag params;
> +	unsigned int bitnum;
> +};
> +TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem mbuf_dynflag_tailq = {
> +	.name = "RTE_MBUF_DYNFLAG",
> +};
> +EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
> +
> +struct mbuf_dyn_shm {
> +	/**
> +	 * For each mbuf byte, free_space[i] != 0 if space is free.
> +	 * The value is the size of the biggest aligned element that
> +	 * can fit in the zone.
> +	 */
> +	uint8_t free_space[sizeof(struct rte_mbuf)];
> +	/** Bitfield of available flags. */
> +	uint64_t free_flags;
> +};
> +static struct mbuf_dyn_shm *shm;
> +
> +/* Set the value of free_space[] according to the size and alignment of
> + * the free areas. This helps to select the best place when reserving a
> + * dynamic field. Assume tailq is locked.
> + */
> +static void
> +process_score(void)
> +{
> +	size_t off, align, size, i;
> +
> +	/* first, erase previous info */
> +	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
> +		if (shm->free_space[i])
> +			shm->free_space[i] = 1;
> +	}
> +
> +	for (off = 0; off < sizeof(struct rte_mbuf); off++) {
> +		/* get the size of the free zone */
> +		for (size = 0; shm->free_space[off + size]; size++)
> +			;
> +		if (size == 0)
> +			continue;
> +
> +		/* get the alignment of biggest object that can fit in
> +		 * the zone at this offset.
> +		 */
> +		for (align = 1;
> +		     (off % (align << 1)) == 0 && (align << 1) <= size;
> +		     align <<= 1)
> +			;
> +
> +		/* save it in free_space[] */
> +		for (i = off; i < off + size; i++)
> +			shm->free_space[i] = RTE_MAX(align, shm->free_space[i]);
> +	}
> +}
> +
> +/* Allocate and initialize the shared memory. Assume tailq is locked */
> +static int
> +init_shared_mem(void)
> +{
> +	const struct rte_memzone *mz;
> +	uint64_t mask;
> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
> +						sizeof(struct mbuf_dyn_shm),
> +						SOCKET_ID_ANY, 0,
> +						RTE_CACHE_LINE_SIZE);
> +	} else {
> +		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
> +	}
> +	if (mz == NULL)
> +		return -1;
> +
> +	shm = mz->addr;
> +
> +#define mark_free(field)						\
> +	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
> +		1, sizeof(((struct rte_mbuf *)0)->field))

Still think it would look nicer without multi-line macro defines/undef in the middle of the function.

> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		/* init free_space, keep it sync'd with
> +		 * rte_mbuf_dynfield_copy().
> +		 */
> +		memset(shm, 0, sizeof(*shm));
> +		mark_free(dynfield1);
> +
> +		/* init free_flags */
> +		for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask <<= 1)
> +			shm->free_flags |= mask;
> +
> +		process_score();
> +	}
> +#undef mark_free
> +
> +	return 0;
> +}
> +
> +/* check if this offset can be used */
> +static int
> +check_offset(size_t offset, size_t size, size_t align)
> +{
> +	size_t i;
> +
> +	if ((offset & (align - 1)) != 0)
> +		return -1;
> +	if (offset + size > sizeof(struct rte_mbuf))
> +		return -1;
> +
> +	for (i = 0; i < size; i++) {
> +		if (!shm->free_space[i + offset])
> +			return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +/* assume tailq is locked */
> +static struct mbuf_dynfield_elt *
> +__mbuf_dynfield_lookup(const char *name)
> +{
> +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> +	struct mbuf_dynfield_elt *mbuf_dynfield;
> +	struct rte_tailq_entry *te;
> +
> +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> +
> +	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
> +		mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
> +		if (strcmp(name, mbuf_dynfield->params.name) == 0)
> +			break;
> +	}
> +
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +
> +	return mbuf_dynfield;
> +}
> +
> +int
> +rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield *params)
> +{
> +	struct mbuf_dynfield_elt *mbuf_dynfield;
> +
> +	if (shm == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_read_lock();
> +	mbuf_dynfield = __mbuf_dynfield_lookup(name);
> +	rte_mcfg_tailq_read_unlock();
> +
> +	if (mbuf_dynfield == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	if (params != NULL)
> +		memcpy(params, &mbuf_dynfield->params, sizeof(*params));
> +
> +	return mbuf_dynfield->offset;
> +}
> +
> +static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
> +		const struct rte_mbuf_dynfield *params2)
> +{
> +	if (strcmp(params1->name, params2->name))
> +		return -1;
> +	if (params1->size != params2->size)
> +		return -1;
> +	if (params1->align != params2->align)
> +		return -1;
> +	if (params1->flags != params2->flags)
> +		return -1;
> +	return 0;
> +}
> +
> +/* assume tailq is locked */
> +static int
> +__rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
> +				size_t req)
> +{
> +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> +	struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
> +	struct rte_tailq_entry *te = NULL;
> +	unsigned int best_zone = UINT_MAX;
> +	size_t i, offset;
> +	int ret;
> +
> +	if (shm == NULL && init_shared_mem() < 0)
> +		return -1;
> +
> +	mbuf_dynfield = __mbuf_dynfield_lookup(params->name);
> +	if (mbuf_dynfield != NULL) {
> +		if (req != SIZE_MAX && req != mbuf_dynfield->offset) {
> +			rte_errno = EEXIST;
> +			return -1;
> +		}
> +		if (mbuf_dynfield_cmp(params, &mbuf_dynfield->params) < 0) {
> +			rte_errno = EEXIST;
> +			return -1;
> +		}
> +		return mbuf_dynfield->offset;
> +	}
> +
> +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> +		rte_errno = EPERM;
> +		return -1;
> +	}
> +
> +	if (req == SIZE_MAX) {
> +		for (offset = 0;
> +		     offset < sizeof(struct rte_mbuf);
> +		     offset++) {
> +			if (check_offset(offset, params->size,
> +						params->align) == 0 &&
> +					shm->free_space[offset] < best_zone) {

Probably worth to explain  a bit more here about best_zone logic -
trying to find offset with minimal score (minimal continuous length), etc.


> +				best_zone = shm->free_space[offset];
> +				req = offset;
> +			}
> +		}
> +		if (req == SIZE_MAX) {
> +			rte_errno = ENOENT;
> +			return -1;
> +		}
> +	} else {
> +		if (check_offset(req, params->size, params->align) < 0) {
> +			rte_errno = EBUSY;
> +			return -1;
> +		}
> +	}
> +
> +	offset = req;
> +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> +
> +	te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL)
> +		return -1;
> +
> +	mbuf_dynfield = rte_zmalloc("mbuf_dynfield", sizeof(*mbuf_dynfield), 0);
> +	if (mbuf_dynfield == NULL) {
> +		rte_free(te);
> +		return -1;
> +	}
> +
> +	ret = strlcpy(mbuf_dynfield->params.name, params->name,
> +		sizeof(mbuf_dynfield->params.name));
> +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->params.name)) {
> +		rte_errno = ENAMETOOLONG;
> +		rte_free(mbuf_dynfield);
> +		rte_free(te);
> +		return -1;
> +	}
> +	memcpy(&mbuf_dynfield->params, params, sizeof(mbuf_dynfield->params));
> +	mbuf_dynfield->offset = offset;
> +	te->data = mbuf_dynfield;
> +
> +	TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
> +
> +	for (i = offset; i < offset + params->size; i++)
> +		shm->free_space[i] = 0;
> +	process_score();
> +
> +	RTE_LOG(DEBUG, MBUF, "Registered dynamic field %s (sz=%zu, al=%zu, fl=0x%x) -> %zd\n",
> +		params->name, params->size, params->align, params->flags,
> +		offset);
> +
> +	return offset;
> +}
> +
> +int
> +rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
> +				size_t req)
> +{
> +	int ret;
> +
> +	if (params->size >= sizeof(struct rte_mbuf)) {
> +		rte_errno = EINVAL;
> +		return -1;
> +	}
> +	if (!rte_is_power_of_2(params->align)) {
> +		rte_errno = EINVAL;
> +		return -1;
> +	}
> +	if (params->flags != 0) {
> +		rte_errno = EINVAL;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_write_lock();
> +	ret = __rte_mbuf_dynfield_register_offset(params, req);
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ret;
> +}
> +
> +int
> +rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params)
> +{
> +	return rte_mbuf_dynfield_register_offset(params, SIZE_MAX);
> +}
> +
> +/* assume tailq is locked */
> +static struct mbuf_dynflag_elt *
> +__mbuf_dynflag_lookup(const char *name)
> +{
> +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> +	struct mbuf_dynflag_elt *mbuf_dynflag;
> +	struct rte_tailq_entry *te;
> +
> +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> +
> +	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
> +		mbuf_dynflag = (struct mbuf_dynflag_elt *)te->data;
> +		if (strncmp(name, mbuf_dynflag->params.name,
> +				RTE_MBUF_DYN_NAMESIZE) == 0)
> +			break;
> +	}
> +
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +
> +	return mbuf_dynflag;
> +}
> +
> +int
> +rte_mbuf_dynflag_lookup(const char *name,
> +			struct rte_mbuf_dynflag *params)
> +{
> +	struct mbuf_dynflag_elt *mbuf_dynflag;
> +
> +	if (shm == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_read_lock();
> +	mbuf_dynflag = __mbuf_dynflag_lookup(name);
> +	rte_mcfg_tailq_read_unlock();
> +
> +	if (mbuf_dynflag == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	if (params != NULL)
> +		memcpy(params, &mbuf_dynflag->params, sizeof(*params));
> +
> +	return mbuf_dynflag->bitnum;
> +}
> +
> +static int mbuf_dynflag_cmp(const struct rte_mbuf_dynflag *params1,
> +		const struct rte_mbuf_dynflag *params2)
> +{
> +	if (strcmp(params1->name, params2->name))
> +		return -1;
> +	if (params1->flags != params2->flags)
> +		return -1;
> +	return 0;
> +}
> +
> +/* assume tailq is locked */
> +static int
> +__rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
> +				unsigned int req)
> +{
> +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> +	struct mbuf_dynflag_elt *mbuf_dynflag = NULL;
> +	struct rte_tailq_entry *te = NULL;
> +	unsigned int bitnum;
> +	int ret;
> +
> +	if (shm == NULL && init_shared_mem() < 0)
> +		return -1;
> +
> +	mbuf_dynflag = __mbuf_dynflag_lookup(params->name);
> +	if (mbuf_dynflag != NULL) {
> +		if (req != UINT_MAX && req != mbuf_dynflag->bitnum) {
> +			rte_errno = EEXIST;
> +			return -1;
> +		}
> +		if (mbuf_dynflag_cmp(params, &mbuf_dynflag->params) < 0) {
> +			rte_errno = EEXIST;
> +			return -1;
> +		}
> +		return mbuf_dynflag->bitnum;
> +	}
> +
> +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> +		rte_errno = EPERM;
> +		return -1;
> +	}
> +
> +	if (req == UINT_MAX) {
> +		if (shm->free_flags == 0) {
> +			rte_errno = ENOENT;
> +			return -1;
> +		}
> +		bitnum = rte_bsf64(shm->free_flags);
> +	} else {
> +		if ((shm->free_flags & (1ULL << req)) == 0) {
> +			rte_errno = EBUSY;
> +			return -1;
> +		}
> +		bitnum = req;
> +	}
> +
> +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> +
> +	te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL)
> +		return -1;
> +
> +	mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag), 0);
> +	if (mbuf_dynflag == NULL) {
> +		rte_free(te);
> +		return -1;
> +	}
> +
> +	ret = strlcpy(mbuf_dynflag->params.name, params->name,
> +		sizeof(mbuf_dynflag->params.name));
> +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->params.name)) {
> +		rte_free(mbuf_dynflag);
> +		rte_free(te);
> +		rte_errno = ENAMETOOLONG;
> +		return -1;
> +	}
> +	mbuf_dynflag->bitnum = bitnum;
> +	te->data = mbuf_dynflag;
> +
> +	TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
> +
> +	shm->free_flags &= ~(1ULL << bitnum);
> +
> +	RTE_LOG(DEBUG, MBUF, "Registered dynamic flag %s (fl=0x%x) -> %u\n",
> +		params->name, params->flags, bitnum);
> +
> +	return bitnum;
> +}
> +
> +int
> +rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
> +				unsigned int req)
> +{
> +	int ret;
> +
> +	if (req != UINT_MAX && req >= 64) {

Might be better to replace 64 with something like sizeof(mbuf->ol_flags) * CHAR_BIT or so.
Apart from that:
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

> +		rte_errno = EINVAL;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_write_lock();
> +	ret = __rte_mbuf_dynflag_register_bitnum(params, req);
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ret;
> +}
> +

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-22 22:51   ` Ananyev, Konstantin
@ 2019-10-23  3:16     ` Wang, Haiyue
  2019-10-23 10:21       ` Olivier Matz
  2019-10-23 10:19     ` Olivier Matz
  1 sibling, 1 reply; 64+ messages in thread
From: Wang, Haiyue @ 2019-10-23  3:16 UTC (permalink / raw)
  To: Ananyev, Konstantin, Olivier Matz, dev
  Cc: Andrew Rybchenko, Richardson, Bruce, Jerin Jacob Kollanukkaran,
	Wiles, Keith, Morten Brørup, Stephen Hemminger,
	Thomas Monjalon

> -----Original Message-----
> From: Ananyev, Konstantin
> Sent: Wednesday, October 23, 2019 06:52
> To: Olivier Matz <olivier.matz@6wind.com>; dev@dpdk.org
> Cc: Andrew Rybchenko <arybchenko@solarflare.com>; Richardson, Bruce <bruce.richardson@intel.com>; Wang,
> Haiyue <haiyue.wang@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> <keith.wiles@intel.com>; Morten Brørup <mb@smartsharesystems.com>; Stephen Hemminger
> <stephen@networkplumber.org>; Thomas Monjalon <thomas@monjalon.net>
> Subject: RE: [PATCH v2] mbuf: support dynamic fields and flags
> 
> 
> > Many features require to store data inside the mbuf. As the room in mbuf
> > structure is limited, it is not possible to have a field for each
> > feature. Also, changing fields in the mbuf structure can break the API
> > or ABI.
> >
> > This commit addresses these issues, by enabling the dynamic registration
> > of fields or flags:
> >
> > - a dynamic field is a named area in the rte_mbuf structure, with a
> >   given size (>= 1 byte) and alignment constraint.
> > - a dynamic flag is a named bit in the rte_mbuf structure.
> >
> > The typical use case is a PMD that registers space for an offload
> > feature, when the application requests to enable this feature.  As
> > the space in mbuf is limited, the space should only be reserved if it
> > is going to be used (i.e when the application explicitly asks for it).
> >
> > The registration can be done at any moment, but it is not possible
> > to unregister fields or flags for now.
> >
> > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > ---
> >
> > v2
> >
> > * Rebase on top of master: solve conflict with Stephen's patchset
> >   (packet copy)
> > * Add new apis to register a dynamic field/flag at a specific place
> > * Add a dump function (sugg by David)
> > * Enhance field registration function to select the best offset, keeping
> >   large aligned zones as much as possible (sugg by Konstantin)
> > * Use a size_t and unsigned int instead of int when relevant
> >   (sugg by Konstantin)
> > * Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
> >   (sugg by Konstantin)
> > * Remove unused argument in private function (sugg by Konstantin)
> > * Fix and simplify locking (sugg by Konstantin)
> > * Fix minor typo
> >
> > rfc -> v1
> >
> > * Rebase on top of master
> > * Change registration API to use a structure instead of
> >   variables, getting rid of #defines (Stephen's comment)
> > * Update flag registration to use a similar API as fields.
> > * Change max name length from 32 to 64 (sugg. by Thomas)
> > * Enhance API documentation (Haiyue's and Andrew's comments)
> > * Add a debug log at registration
> > * Add some words in release note
> > * Did some performance tests (sugg. by Andrew):
> >   On my platform, reading a dynamic field takes ~3 cycles more
> >   than a static field, and ~2 cycles more for writing.
> >
> >  app/test/test_mbuf.c                   | 145 ++++++-
> >  doc/guides/rel_notes/release_19_11.rst |   7 +
> >  lib/librte_mbuf/Makefile               |   2 +
> >  lib/librte_mbuf/meson.build            |   6 +-
> >  lib/librte_mbuf/rte_mbuf.h             |  23 +-
> >  lib/librte_mbuf/rte_mbuf_dyn.c         | 548 +++++++++++++++++++++++++
> >  lib/librte_mbuf/rte_mbuf_dyn.h         | 226 ++++++++++
> >  lib/librte_mbuf/rte_mbuf_version.map   |   7 +
> >  8 files changed, 959 insertions(+), 5 deletions(-)
> >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
> >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> >
> > diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> > index b9c2b2500..01cafad59 100644
> > --- a/app/test/test_mbuf.c
> > +++ b/app/test/test_mbuf.c
> > @@ -28,6 +28,7 @@
> >  #include <rte_random.h>
> >  #include <rte_cycles.h>
> >  #include <rte_malloc.h>
> > +#include <rte_mbuf_dyn.h>
> >

[snip]
> > +int
> > +rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
> > +				unsigned int req)
> > +{
> > +	int ret;
> > +
> > +	if (req != UINT_MAX && req >= 64) {
> 
> Might be better to replace 64 with something like sizeof(mbuf->ol_flags) * CHAR_BIT or so.

Might introduce a new macro like kernel:

/**
 * FIELD_SIZEOF - get the size of a struct's field
 * @t: the target struct
 * @f: the target struct's field
 * Return: the size of @f in the struct definition without having a
 * declared instance of @t.
 */
#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))

Then: FIELD_SIZEOF(rte_mbuf, ol_flags) * CHAR_BIT

> Apart from that:
> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> 
> > +		rte_errno = EINVAL;
> > +		return -1;
> > +	}
> > +
> > +	rte_mcfg_tailq_write_lock();
> > +	ret = __rte_mbuf_dynflag_register_bitnum(params, req);
> > +	rte_mcfg_tailq_write_unlock();
> > +
> > +	return ret;
> > +}
> > +

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-22 22:51   ` Ananyev, Konstantin
  2019-10-23  3:16     ` Wang, Haiyue
@ 2019-10-23 10:19     ` Olivier Matz
  2019-10-23 11:45       ` Olivier Matz
  1 sibling, 1 reply; 64+ messages in thread
From: Olivier Matz @ 2019-10-23 10:19 UTC (permalink / raw)
  To: Ananyev, Konstantin
  Cc: dev, Andrew Rybchenko, Richardson, Bruce, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Morten Brørup,
	Stephen Hemminger, Thomas Monjalon

On Tue, Oct 22, 2019 at 10:51:51PM +0000, Ananyev, Konstantin wrote:
> 
> > Many features require to store data inside the mbuf. As the room in mbuf
> > structure is limited, it is not possible to have a field for each
> > feature. Also, changing fields in the mbuf structure can break the API
> > or ABI.
> > 
> > This commit addresses these issues, by enabling the dynamic registration
> > of fields or flags:
> > 
> > - a dynamic field is a named area in the rte_mbuf structure, with a
> >   given size (>= 1 byte) and alignment constraint.
> > - a dynamic flag is a named bit in the rte_mbuf structure.
> > 
> > The typical use case is a PMD that registers space for an offload
> > feature, when the application requests to enable this feature.  As
> > the space in mbuf is limited, the space should only be reserved if it
> > is going to be used (i.e when the application explicitly asks for it).
> > 
> > The registration can be done at any moment, but it is not possible
> > to unregister fields or flags for now.
> > 
> > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > ---
> > 
> > v2
> > 
> > * Rebase on top of master: solve conflict with Stephen's patchset
> >   (packet copy)
> > * Add new apis to register a dynamic field/flag at a specific place
> > * Add a dump function (sugg by David)
> > * Enhance field registration function to select the best offset, keeping
> >   large aligned zones as much as possible (sugg by Konstantin)
> > * Use a size_t and unsigned int instead of int when relevant
> >   (sugg by Konstantin)
> > * Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
> >   (sugg by Konstantin)
> > * Remove unused argument in private function (sugg by Konstantin)
> > * Fix and simplify locking (sugg by Konstantin)
> > * Fix minor typo
> > 
> > rfc -> v1
> > 
> > * Rebase on top of master
> > * Change registration API to use a structure instead of
> >   variables, getting rid of #defines (Stephen's comment)
> > * Update flag registration to use a similar API as fields.
> > * Change max name length from 32 to 64 (sugg. by Thomas)
> > * Enhance API documentation (Haiyue's and Andrew's comments)
> > * Add a debug log at registration
> > * Add some words in release note
> > * Did some performance tests (sugg. by Andrew):
> >   On my platform, reading a dynamic field takes ~3 cycles more
> >   than a static field, and ~2 cycles more for writing.
> > 
> >  app/test/test_mbuf.c                   | 145 ++++++-
> >  doc/guides/rel_notes/release_19_11.rst |   7 +
> >  lib/librte_mbuf/Makefile               |   2 +
> >  lib/librte_mbuf/meson.build            |   6 +-
> >  lib/librte_mbuf/rte_mbuf.h             |  23 +-
> >  lib/librte_mbuf/rte_mbuf_dyn.c         | 548 +++++++++++++++++++++++++
> >  lib/librte_mbuf/rte_mbuf_dyn.h         | 226 ++++++++++
> >  lib/librte_mbuf/rte_mbuf_version.map   |   7 +
> >  8 files changed, 959 insertions(+), 5 deletions(-)
> >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
> >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> > 
> > diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> > index b9c2b2500..01cafad59 100644
> > --- a/app/test/test_mbuf.c
> > +++ b/app/test/test_mbuf.c
> > @@ -28,6 +28,7 @@
> >  #include <rte_random.h>
> >  #include <rte_cycles.h>
> >  #include <rte_malloc.h>
> > +#include <rte_mbuf_dyn.h>
> > 
> >  #include "test.h"
> > 
> > @@ -657,7 +658,6 @@ test_attach_from_different_pool(struct rte_mempool *pktmbuf_pool,
> >  		rte_pktmbuf_free(clone2);
> >  	return -1;
> >  }
> > -#undef GOTO_FAIL
> > 
> >  /*
> >   * test allocation and free of mbufs
> > @@ -1276,6 +1276,143 @@ test_tx_offload(void)
> >  	return (v1 == v2) ? 0 : -EINVAL;
> >  }
> > 
> > +static int
> > +test_mbuf_dyn(struct rte_mempool *pktmbuf_pool)
> > +{
> > +	const struct rte_mbuf_dynfield dynfield = {
> > +		.name = "test-dynfield",
> > +		.size = sizeof(uint8_t),
> > +		.align = __alignof__(uint8_t),
> > +		.flags = 0,
> > +	};
> > +	const struct rte_mbuf_dynfield dynfield2 = {
> > +		.name = "test-dynfield2",
> > +		.size = sizeof(uint16_t),
> > +		.align = __alignof__(uint16_t),
> > +		.flags = 0,
> > +	};
> > +	const struct rte_mbuf_dynfield dynfield3 = {
> > +		.name = "test-dynfield3",
> > +		.size = sizeof(uint8_t),
> > +		.align = __alignof__(uint8_t),
> > +		.flags = 0,
> > +	};
> > +	const struct rte_mbuf_dynfield dynfield_fail_big = {
> > +		.name = "test-dynfield-fail-big",
> > +		.size = 256,
> > +		.align = 1,
> > +		.flags = 0,
> > +	};
> > +	const struct rte_mbuf_dynfield dynfield_fail_align = {
> > +		.name = "test-dynfield-fail-align",
> > +		.size = 1,
> > +		.align = 3,
> > +		.flags = 0,
> > +	};
> > +	const struct rte_mbuf_dynflag dynflag = {
> > +		.name = "test-dynflag",
> > +		.flags = 0,
> > +	};
> > +	const struct rte_mbuf_dynflag dynflag2 = {
> > +		.name = "test-dynflag2",
> > +		.flags = 0,
> > +	};
> > +	const struct rte_mbuf_dynflag dynflag3 = {
> > +		.name = "test-dynflag3",
> > +		.flags = 0,
> > +	};
> > +	struct rte_mbuf *m = NULL;
> > +	int offset, offset2, offset3;
> > +	int flag, flag2, flag3;
> > +	int ret;
> > +
> > +	printf("Test mbuf dynamic fields and flags\n");
> > +	rte_mbuf_dyn_dump(stdout);
> > +
> > +	offset = rte_mbuf_dynfield_register(&dynfield);
> > +	if (offset == -1)
> > +		GOTO_FAIL("failed to register dynamic field, offset=%d: %s",
> > +			offset, strerror(errno));
> > +
> > +	ret = rte_mbuf_dynfield_register(&dynfield);
> > +	if (ret != offset)
> > +		GOTO_FAIL("failed to lookup dynamic field, ret=%d: %s",
> > +			ret, strerror(errno));
> > +
> > +	offset2 = rte_mbuf_dynfield_register(&dynfield2);
> > +	if (offset2 == -1 || offset2 == offset || (offset2 & 1))
> > +		GOTO_FAIL("failed to register dynamic field 2, offset2=%d: %s",
> > +			offset2, strerror(errno));
> > +
> > +	offset3 = rte_mbuf_dynfield_register_offset(&dynfield3,
> > +				offsetof(struct rte_mbuf, dynfield1[1]));
> > +	if (offset3 != offsetof(struct rte_mbuf, dynfield1[1]))
> > +		GOTO_FAIL("failed to register dynamic field 3, offset=%d: %s",
> > +			offset3, strerror(errno));
> > +
> > +	printf("dynfield: offset=%d, offset2=%d, offset3=%d\n",
> > +		offset, offset2, offset3);
> > +
> > +	ret = rte_mbuf_dynfield_register(&dynfield_fail_big);
> > +	if (ret != -1)
> > +		GOTO_FAIL("dynamic field creation should fail (too big)");
> > +
> > +	ret = rte_mbuf_dynfield_register(&dynfield_fail_align);
> > +	if (ret != -1)
> > +		GOTO_FAIL("dynamic field creation should fail (bad alignment)");
> > +
> > +	ret = rte_mbuf_dynfield_register_offset(&dynfield_fail_align,
> > +				offsetof(struct rte_mbuf, ol_flags));
> > +	if (ret != -1)
> > +		GOTO_FAIL("dynamic field creation should fail (not avail)");
> > +
> > +	flag = rte_mbuf_dynflag_register(&dynflag);
> > +	if (flag == -1)
> > +		GOTO_FAIL("failed to register dynamic flag, flag=%d: %s",
> > +			flag, strerror(errno));
> > +
> > +	ret = rte_mbuf_dynflag_register(&dynflag);
> > +	if (ret != flag)
> > +		GOTO_FAIL("failed to lookup dynamic flag, ret=%d: %s",
> > +			ret, strerror(errno));
> > +
> > +	flag2 = rte_mbuf_dynflag_register(&dynflag2);
> > +	if (flag2 == -1 || flag2 == flag)
> > +		GOTO_FAIL("failed to register dynamic flag 2, flag2=%d: %s",
> > +			flag2, strerror(errno));
> > +
> > +	flag3 = rte_mbuf_dynflag_register_bitnum(&dynflag3,
> > +						rte_bsf64(PKT_LAST_FREE));
> > +	if (flag3 != rte_bsf64(PKT_LAST_FREE))
> > +		GOTO_FAIL("failed to register dynamic flag 3, flag2=%d: %s",
> > +			flag3, strerror(errno));
> > +
> > +	printf("dynflag: flag=%d, flag2=%d, flag3=%d\n", flag, flag2, flag3);
> > +
> > +	/* set, get dynamic field */
> > +	m = rte_pktmbuf_alloc(pktmbuf_pool);
> > +	if (m == NULL)
> > +		GOTO_FAIL("Cannot allocate mbuf");
> > +
> > +	*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) = 1;
> > +	if (*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) != 1)
> > +		GOTO_FAIL("failed to read dynamic field");
> > +	*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) = 1000;
> > +	if (*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) != 1000)
> > +		GOTO_FAIL("failed to read dynamic field");
> > +
> > +	/* set a dynamic flag */
> > +	m->ol_flags |= (1ULL << flag);
> > +
> > +	rte_mbuf_dyn_dump(stdout);
> > +	rte_pktmbuf_free(m);
> > +	return 0;
> > +fail:
> > +	rte_pktmbuf_free(m);
> > +	return -1;
> > +}
> > +#undef GOTO_FAIL
> > +
> >  static int
> >  test_mbuf(void)
> >  {
> > @@ -1295,6 +1432,12 @@ test_mbuf(void)
> >  		goto err;
> >  	}
> > 
> > +	/* test registration of dynamic fields and flags */
> > +	if (test_mbuf_dyn(pktmbuf_pool) < 0) {
> > +		printf("mbuf dynflag test failed\n");
> > +		goto err;
> > +	}
> > +
> >  	/* create a specific pktmbuf pool with a priv_size != 0 and no data
> >  	 * room size */
> >  	pktmbuf_pool2 = rte_pktmbuf_pool_create("test_pktmbuf_pool2",
> > diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst
> > index 85953b962..9e9c94554 100644
> > --- a/doc/guides/rel_notes/release_19_11.rst
> > +++ b/doc/guides/rel_notes/release_19_11.rst
> > @@ -21,6 +21,13 @@ DPDK Release 19.11
> > 
> >        xdg-open build/doc/html/guides/rel_notes/release_19_11.html
> > 
> > +* **Add support of support dynamic fields and flags in mbuf.**
> > +
> > +  This new feature adds the ability to dynamically register some room
> > +  for a field or a flag in the mbuf structure. This is typically used
> > +  for specific offload features, where adding a static field or flag
> > +  in the mbuf is not justified.
> > +
> > 
> >  New Features
> >  ------------
> > diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile
> > index c8f6d2689..5a9bcee73 100644
> > --- a/lib/librte_mbuf/Makefile
> > +++ b/lib/librte_mbuf/Makefile
> > @@ -17,8 +17,10 @@ LIBABIVER := 5
> > 
> >  # all source are stored in SRCS-y
> >  SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c rte_mbuf_pool_ops.c
> > +SRCS-$(CONFIG_RTE_LIBRTE_MBUF) += rte_mbuf_dyn.c
> > 
> >  # install includes
> >  SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h rte_mbuf_ptype.h rte_mbuf_pool_ops.h
> > +SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include += rte_mbuf_dyn.h
> > 
> >  include $(RTE_SDK)/mk/rte.lib.mk
> > diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build
> > index 6cc11ebb4..9137e8f26 100644
> > --- a/lib/librte_mbuf/meson.build
> > +++ b/lib/librte_mbuf/meson.build
> > @@ -2,8 +2,10 @@
> >  # Copyright(c) 2017 Intel Corporation
> > 
> >  version = 5
> > -sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c')
> > -headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
> > +sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c',
> > +	'rte_mbuf_dyn.c')
> > +headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h',
> > +	'rte_mbuf_dyn.h')
> >  deps += ['mempool']
> > 
> >  allow_experimental_apis = true
> > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> > index fb0849ac1..5740b1e93 100644
> > --- a/lib/librte_mbuf/rte_mbuf.h
> > +++ b/lib/librte_mbuf/rte_mbuf.h
> > @@ -198,9 +198,12 @@ extern "C" {
> >  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
> >  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
> > 
> > -/* add new RX flags here */
> > +/* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> > 
> > -/* add new TX flags here */
> > +#define PKT_FIRST_FREE (1ULL << 23)
> > +#define PKT_LAST_FREE (1ULL << 39)
> > +
> > +/* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> > 
> >  /**
> >   * Indicate that the metadata field in the mbuf is in use.
> > @@ -738,6 +741,7 @@ struct rte_mbuf {
> >  	 */
> >  	struct rte_mbuf_ext_shared_info *shinfo;
> > 
> > +	uint64_t dynfield1[2]; /**< Reserved for dynamic fields. */
> >  } __rte_cache_aligned;
> > 
> >  /**
> > @@ -1684,6 +1688,20 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
> >   */
> >  #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
> > 
> > +/**
> > + * Copy dynamic fields from m_src to m_dst.
> > + *
> > + * @param m_dst
> > + *   The destination mbuf.
> > + * @param m_src
> > + *   The source mbuf.
> > + */
> > +static inline void
> > +rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
> > +{
> > +	memcpy(&mdst->dynfield1, msrc->dynfield1, sizeof(mdst->dynfield1));
> > +}
> > +
> >  /* internal */
> >  static inline void
> >  __rte_pktmbuf_copy_hdr(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
> > @@ -1695,6 +1713,7 @@ __rte_pktmbuf_copy_hdr(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
> >  	mdst->hash = msrc->hash;
> >  	mdst->packet_type = msrc->packet_type;
> >  	mdst->timestamp = msrc->timestamp;
> > +	rte_mbuf_dynfield_copy(mdst, msrc);
> >  }
> > 
> >  /**
> > diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c b/lib/librte_mbuf/rte_mbuf_dyn.c
> > new file mode 100644
> > index 000000000..9ef235483
> > --- /dev/null
> > +++ b/lib/librte_mbuf/rte_mbuf_dyn.c
> > @@ -0,0 +1,548 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright 2019 6WIND S.A.
> > + */
> > +
> > +#include <sys/queue.h>
> > +#include <stdint.h>
> > +#include <limits.h>
> > +
> > +#include <rte_common.h>
> > +#include <rte_eal.h>
> > +#include <rte_eal_memconfig.h>
> > +#include <rte_tailq.h>
> > +#include <rte_errno.h>
> > +#include <rte_malloc.h>
> > +#include <rte_string_fns.h>
> > +#include <rte_mbuf.h>
> > +#include <rte_mbuf_dyn.h>
> > +
> > +#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
> > +
> > +struct mbuf_dynfield_elt {
> > +	TAILQ_ENTRY(mbuf_dynfield_elt) next;
> > +	struct rte_mbuf_dynfield params;
> > +	size_t offset;
> > +};
> > +TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
> > +
> > +static struct rte_tailq_elem mbuf_dynfield_tailq = {
> > +	.name = "RTE_MBUF_DYNFIELD",
> > +};
> > +EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
> > +
> > +struct mbuf_dynflag_elt {
> > +	TAILQ_ENTRY(mbuf_dynflag_elt) next;
> > +	struct rte_mbuf_dynflag params;
> > +	unsigned int bitnum;
> > +};
> > +TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
> > +
> > +static struct rte_tailq_elem mbuf_dynflag_tailq = {
> > +	.name = "RTE_MBUF_DYNFLAG",
> > +};
> > +EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
> > +
> > +struct mbuf_dyn_shm {
> > +	/**
> > +	 * For each mbuf byte, free_space[i] != 0 if space is free.
> > +	 * The value is the size of the biggest aligned element that
> > +	 * can fit in the zone.
> > +	 */
> > +	uint8_t free_space[sizeof(struct rte_mbuf)];
> > +	/** Bitfield of available flags. */
> > +	uint64_t free_flags;
> > +};
> > +static struct mbuf_dyn_shm *shm;
> > +
> > +/* Set the value of free_space[] according to the size and alignment of
> > + * the free areas. This helps to select the best place when reserving a
> > + * dynamic field. Assume tailq is locked.
> > + */
> > +static void
> > +process_score(void)
> > +{
> > +	size_t off, align, size, i;
> > +
> > +	/* first, erase previous info */
> > +	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
> > +		if (shm->free_space[i])
> > +			shm->free_space[i] = 1;
> > +	}
> > +
> > +	for (off = 0; off < sizeof(struct rte_mbuf); off++) {
> > +		/* get the size of the free zone */
> > +		for (size = 0; shm->free_space[off + size]; size++)
> > +			;
> > +		if (size == 0)
> > +			continue;
> > +
> > +		/* get the alignment of biggest object that can fit in
> > +		 * the zone at this offset.
> > +		 */
> > +		for (align = 1;
> > +		     (off % (align << 1)) == 0 && (align << 1) <= size;
> > +		     align <<= 1)
> > +			;
> > +
> > +		/* save it in free_space[] */
> > +		for (i = off; i < off + size; i++)
> > +			shm->free_space[i] = RTE_MAX(align, shm->free_space[i]);
> > +	}
> > +}
> > +
> > +/* Allocate and initialize the shared memory. Assume tailq is locked */
> > +static int
> > +init_shared_mem(void)
> > +{
> > +	const struct rte_memzone *mz;
> > +	uint64_t mask;
> > +
> > +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > +		mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
> > +						sizeof(struct mbuf_dyn_shm),
> > +						SOCKET_ID_ANY, 0,
> > +						RTE_CACHE_LINE_SIZE);
> > +	} else {
> > +		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
> > +	}
> > +	if (mz == NULL)
> > +		return -1;
> > +
> > +	shm = mz->addr;
> > +
> > +#define mark_free(field)						\
> > +	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
> > +		1, sizeof(((struct rte_mbuf *)0)->field))
> 
> Still think it would look nicer without multi-line macro defines/undef in the middle of the function.

I rather think that macro helps to make the code more readable, but it's
probably just a matter of taste. Will someone puts a contract on me if I
keep it like this? If yes I'll do the change ;)


> > +
> > +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > +		/* init free_space, keep it sync'd with
> > +		 * rte_mbuf_dynfield_copy().
> > +		 */
> > +		memset(shm, 0, sizeof(*shm));
> > +		mark_free(dynfield1);
> > +
> > +		/* init free_flags */
> > +		for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask <<= 1)
> > +			shm->free_flags |= mask;
> > +
> > +		process_score();
> > +	}
> > +#undef mark_free
> > +
> > +	return 0;
> > +}
> > +
> > +/* check if this offset can be used */
> > +static int
> > +check_offset(size_t offset, size_t size, size_t align)
> > +{
> > +	size_t i;
> > +
> > +	if ((offset & (align - 1)) != 0)
> > +		return -1;
> > +	if (offset + size > sizeof(struct rte_mbuf))
> > +		return -1;
> > +
> > +	for (i = 0; i < size; i++) {
> > +		if (!shm->free_space[i + offset])
> > +			return -1;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +/* assume tailq is locked */
> > +static struct mbuf_dynfield_elt *
> > +__mbuf_dynfield_lookup(const char *name)
> > +{
> > +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> > +	struct mbuf_dynfield_elt *mbuf_dynfield;
> > +	struct rte_tailq_entry *te;
> > +
> > +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> > +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> > +
> > +	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
> > +		mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
> > +		if (strcmp(name, mbuf_dynfield->params.name) == 0)
> > +			break;
> > +	}
> > +
> > +	if (te == NULL) {
> > +		rte_errno = ENOENT;
> > +		return NULL;
> > +	}
> > +
> > +	return mbuf_dynfield;
> > +}
> > +
> > +int
> > +rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield *params)
> > +{
> > +	struct mbuf_dynfield_elt *mbuf_dynfield;
> > +
> > +	if (shm == NULL) {
> > +		rte_errno = ENOENT;
> > +		return -1;
> > +	}
> > +
> > +	rte_mcfg_tailq_read_lock();
> > +	mbuf_dynfield = __mbuf_dynfield_lookup(name);
> > +	rte_mcfg_tailq_read_unlock();
> > +
> > +	if (mbuf_dynfield == NULL) {
> > +		rte_errno = ENOENT;
> > +		return -1;
> > +	}
> > +
> > +	if (params != NULL)
> > +		memcpy(params, &mbuf_dynfield->params, sizeof(*params));
> > +
> > +	return mbuf_dynfield->offset;
> > +}
> > +
> > +static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
> > +		const struct rte_mbuf_dynfield *params2)
> > +{
> > +	if (strcmp(params1->name, params2->name))
> > +		return -1;
> > +	if (params1->size != params2->size)
> > +		return -1;
> > +	if (params1->align != params2->align)
> > +		return -1;
> > +	if (params1->flags != params2->flags)
> > +		return -1;
> > +	return 0;
> > +}
> > +
> > +/* assume tailq is locked */
> > +static int
> > +__rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
> > +				size_t req)
> > +{
> > +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> > +	struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
> > +	struct rte_tailq_entry *te = NULL;
> > +	unsigned int best_zone = UINT_MAX;
> > +	size_t i, offset;
> > +	int ret;
> > +
> > +	if (shm == NULL && init_shared_mem() < 0)
> > +		return -1;
> > +
> > +	mbuf_dynfield = __mbuf_dynfield_lookup(params->name);
> > +	if (mbuf_dynfield != NULL) {
> > +		if (req != SIZE_MAX && req != mbuf_dynfield->offset) {
> > +			rte_errno = EEXIST;
> > +			return -1;
> > +		}
> > +		if (mbuf_dynfield_cmp(params, &mbuf_dynfield->params) < 0) {
> > +			rte_errno = EEXIST;
> > +			return -1;
> > +		}
> > +		return mbuf_dynfield->offset;
> > +	}
> > +
> > +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> > +		rte_errno = EPERM;
> > +		return -1;
> > +	}
> > +
> > +	if (req == SIZE_MAX) {
> > +		for (offset = 0;
> > +		     offset < sizeof(struct rte_mbuf);
> > +		     offset++) {
> > +			if (check_offset(offset, params->size,
> > +						params->align) == 0 &&
> > +					shm->free_space[offset] < best_zone) {
> 
> Probably worth to explain  a bit more here about best_zone logic -
> trying to find offset with minimal score (minimal continuous length), etc.

Yes, will do.


> > +				best_zone = shm->free_space[offset];
> > +				req = offset;
> > +			}
> > +		}
> > +		if (req == SIZE_MAX) {
> > +			rte_errno = ENOENT;
> > +			return -1;
> > +		}
> > +	} else {
> > +		if (check_offset(req, params->size, params->align) < 0) {
> > +			rte_errno = EBUSY;
> > +			return -1;
> > +		}
> > +	}
> > +
> > +	offset = req;
> > +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> > +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> > +
> > +	te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
> > +	if (te == NULL)
> > +		return -1;
> > +
> > +	mbuf_dynfield = rte_zmalloc("mbuf_dynfield", sizeof(*mbuf_dynfield), 0);
> > +	if (mbuf_dynfield == NULL) {
> > +		rte_free(te);
> > +		return -1;
> > +	}
> > +
> > +	ret = strlcpy(mbuf_dynfield->params.name, params->name,
> > +		sizeof(mbuf_dynfield->params.name));
> > +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->params.name)) {
> > +		rte_errno = ENAMETOOLONG;
> > +		rte_free(mbuf_dynfield);
> > +		rte_free(te);
> > +		return -1;
> > +	}
> > +	memcpy(&mbuf_dynfield->params, params, sizeof(mbuf_dynfield->params));
> > +	mbuf_dynfield->offset = offset;
> > +	te->data = mbuf_dynfield;
> > +
> > +	TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
> > +
> > +	for (i = offset; i < offset + params->size; i++)
> > +		shm->free_space[i] = 0;
> > +	process_score();
> > +
> > +	RTE_LOG(DEBUG, MBUF, "Registered dynamic field %s (sz=%zu, al=%zu, fl=0x%x) -> %zd\n",
> > +		params->name, params->size, params->align, params->flags,
> > +		offset);
> > +
> > +	return offset;
> > +}
> > +
> > +int
> > +rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
> > +				size_t req)
> > +{
> > +	int ret;
> > +
> > +	if (params->size >= sizeof(struct rte_mbuf)) {
> > +		rte_errno = EINVAL;
> > +		return -1;
> > +	}
> > +	if (!rte_is_power_of_2(params->align)) {
> > +		rte_errno = EINVAL;
> > +		return -1;
> > +	}
> > +	if (params->flags != 0) {
> > +		rte_errno = EINVAL;
> > +		return -1;
> > +	}
> > +
> > +	rte_mcfg_tailq_write_lock();
> > +	ret = __rte_mbuf_dynfield_register_offset(params, req);
> > +	rte_mcfg_tailq_write_unlock();
> > +
> > +	return ret;
> > +}
> > +
> > +int
> > +rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params)
> > +{
> > +	return rte_mbuf_dynfield_register_offset(params, SIZE_MAX);
> > +}
> > +
> > +/* assume tailq is locked */
> > +static struct mbuf_dynflag_elt *
> > +__mbuf_dynflag_lookup(const char *name)
> > +{
> > +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> > +	struct mbuf_dynflag_elt *mbuf_dynflag;
> > +	struct rte_tailq_entry *te;
> > +
> > +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> > +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> > +
> > +	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
> > +		mbuf_dynflag = (struct mbuf_dynflag_elt *)te->data;
> > +		if (strncmp(name, mbuf_dynflag->params.name,
> > +				RTE_MBUF_DYN_NAMESIZE) == 0)
> > +			break;
> > +	}
> > +
> > +	if (te == NULL) {
> > +		rte_errno = ENOENT;
> > +		return NULL;
> > +	}
> > +
> > +	return mbuf_dynflag;
> > +}
> > +
> > +int
> > +rte_mbuf_dynflag_lookup(const char *name,
> > +			struct rte_mbuf_dynflag *params)
> > +{
> > +	struct mbuf_dynflag_elt *mbuf_dynflag;
> > +
> > +	if (shm == NULL) {
> > +		rte_errno = ENOENT;
> > +		return -1;
> > +	}
> > +
> > +	rte_mcfg_tailq_read_lock();
> > +	mbuf_dynflag = __mbuf_dynflag_lookup(name);
> > +	rte_mcfg_tailq_read_unlock();
> > +
> > +	if (mbuf_dynflag == NULL) {
> > +		rte_errno = ENOENT;
> > +		return -1;
> > +	}
> > +
> > +	if (params != NULL)
> > +		memcpy(params, &mbuf_dynflag->params, sizeof(*params));
> > +
> > +	return mbuf_dynflag->bitnum;
> > +}
> > +
> > +static int mbuf_dynflag_cmp(const struct rte_mbuf_dynflag *params1,
> > +		const struct rte_mbuf_dynflag *params2)
> > +{
> > +	if (strcmp(params1->name, params2->name))
> > +		return -1;
> > +	if (params1->flags != params2->flags)
> > +		return -1;
> > +	return 0;
> > +}
> > +
> > +/* assume tailq is locked */
> > +static int
> > +__rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
> > +				unsigned int req)
> > +{
> > +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> > +	struct mbuf_dynflag_elt *mbuf_dynflag = NULL;
> > +	struct rte_tailq_entry *te = NULL;
> > +	unsigned int bitnum;
> > +	int ret;
> > +
> > +	if (shm == NULL && init_shared_mem() < 0)
> > +		return -1;
> > +
> > +	mbuf_dynflag = __mbuf_dynflag_lookup(params->name);
> > +	if (mbuf_dynflag != NULL) {
> > +		if (req != UINT_MAX && req != mbuf_dynflag->bitnum) {
> > +			rte_errno = EEXIST;
> > +			return -1;
> > +		}
> > +		if (mbuf_dynflag_cmp(params, &mbuf_dynflag->params) < 0) {
> > +			rte_errno = EEXIST;
> > +			return -1;
> > +		}
> > +		return mbuf_dynflag->bitnum;
> > +	}
> > +
> > +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> > +		rte_errno = EPERM;
> > +		return -1;
> > +	}
> > +
> > +	if (req == UINT_MAX) {
> > +		if (shm->free_flags == 0) {
> > +			rte_errno = ENOENT;
> > +			return -1;
> > +		}
> > +		bitnum = rte_bsf64(shm->free_flags);
> > +	} else {
> > +		if ((shm->free_flags & (1ULL << req)) == 0) {
> > +			rte_errno = EBUSY;
> > +			return -1;
> > +		}
> > +		bitnum = req;
> > +	}
> > +
> > +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> > +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> > +
> > +	te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
> > +	if (te == NULL)
> > +		return -1;
> > +
> > +	mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag), 0);
> > +	if (mbuf_dynflag == NULL) {
> > +		rte_free(te);
> > +		return -1;
> > +	}
> > +
> > +	ret = strlcpy(mbuf_dynflag->params.name, params->name,
> > +		sizeof(mbuf_dynflag->params.name));
> > +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->params.name)) {
> > +		rte_free(mbuf_dynflag);
> > +		rte_free(te);
> > +		rte_errno = ENAMETOOLONG;
> > +		return -1;
> > +	}
> > +	mbuf_dynflag->bitnum = bitnum;
> > +	te->data = mbuf_dynflag;
> > +
> > +	TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
> > +
> > +	shm->free_flags &= ~(1ULL << bitnum);
> > +
> > +	RTE_LOG(DEBUG, MBUF, "Registered dynamic flag %s (fl=0x%x) -> %u\n",
> > +		params->name, params->flags, bitnum);
> > +
> > +	return bitnum;
> > +}
> > +
> > +int
> > +rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
> > +				unsigned int req)
> > +{
> > +	int ret;
> > +
> > +	if (req != UINT_MAX && req >= 64) {
> 
> Might be better to replace 64 with something like sizeof(mbuf->ol_flags) * CHAR_BIT or so.

Will do.

> Apart from that:
> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>


Thanks for the review
Olivier

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-23  3:16     ` Wang, Haiyue
@ 2019-10-23 10:21       ` Olivier Matz
  2019-10-23 15:00         ` Stephen Hemminger
  0 siblings, 1 reply; 64+ messages in thread
From: Olivier Matz @ 2019-10-23 10:21 UTC (permalink / raw)
  To: Wang, Haiyue
  Cc: Ananyev, Konstantin, dev, Andrew Rybchenko, Richardson, Bruce,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Morten Brørup,
	Stephen Hemminger, Thomas Monjalon

On Wed, Oct 23, 2019 at 03:16:13AM +0000, Wang, Haiyue wrote:
> > -----Original Message-----
> > From: Ananyev, Konstantin
> > Sent: Wednesday, October 23, 2019 06:52
> > To: Olivier Matz <olivier.matz@6wind.com>; dev@dpdk.org
> > Cc: Andrew Rybchenko <arybchenko@solarflare.com>; Richardson, Bruce <bruce.richardson@intel.com>; Wang,
> > Haiyue <haiyue.wang@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> > <keith.wiles@intel.com>; Morten Brørup <mb@smartsharesystems.com>; Stephen Hemminger
> > <stephen@networkplumber.org>; Thomas Monjalon <thomas@monjalon.net>
> > Subject: RE: [PATCH v2] mbuf: support dynamic fields and flags
> > 
> > 
> > > Many features require to store data inside the mbuf. As the room in mbuf
> > > structure is limited, it is not possible to have a field for each
> > > feature. Also, changing fields in the mbuf structure can break the API
> > > or ABI.
> > >
> > > This commit addresses these issues, by enabling the dynamic registration
> > > of fields or flags:
> > >
> > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > >   given size (>= 1 byte) and alignment constraint.
> > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > >
> > > The typical use case is a PMD that registers space for an offload
> > > feature, when the application requests to enable this feature.  As
> > > the space in mbuf is limited, the space should only be reserved if it
> > > is going to be used (i.e when the application explicitly asks for it).
> > >
> > > The registration can be done at any moment, but it is not possible
> > > to unregister fields or flags for now.
> > >
> > > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > > ---
> > >
> > > v2
> > >
> > > * Rebase on top of master: solve conflict with Stephen's patchset
> > >   (packet copy)
> > > * Add new apis to register a dynamic field/flag at a specific place
> > > * Add a dump function (sugg by David)
> > > * Enhance field registration function to select the best offset, keeping
> > >   large aligned zones as much as possible (sugg by Konstantin)
> > > * Use a size_t and unsigned int instead of int when relevant
> > >   (sugg by Konstantin)
> > > * Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
> > >   (sugg by Konstantin)
> > > * Remove unused argument in private function (sugg by Konstantin)
> > > * Fix and simplify locking (sugg by Konstantin)
> > > * Fix minor typo
> > >
> > > rfc -> v1
> > >
> > > * Rebase on top of master
> > > * Change registration API to use a structure instead of
> > >   variables, getting rid of #defines (Stephen's comment)
> > > * Update flag registration to use a similar API as fields.
> > > * Change max name length from 32 to 64 (sugg. by Thomas)
> > > * Enhance API documentation (Haiyue's and Andrew's comments)
> > > * Add a debug log at registration
> > > * Add some words in release note
> > > * Did some performance tests (sugg. by Andrew):
> > >   On my platform, reading a dynamic field takes ~3 cycles more
> > >   than a static field, and ~2 cycles more for writing.
> > >
> > >  app/test/test_mbuf.c                   | 145 ++++++-
> > >  doc/guides/rel_notes/release_19_11.rst |   7 +
> > >  lib/librte_mbuf/Makefile               |   2 +
> > >  lib/librte_mbuf/meson.build            |   6 +-
> > >  lib/librte_mbuf/rte_mbuf.h             |  23 +-
> > >  lib/librte_mbuf/rte_mbuf_dyn.c         | 548 +++++++++++++++++++++++++
> > >  lib/librte_mbuf/rte_mbuf_dyn.h         | 226 ++++++++++
> > >  lib/librte_mbuf/rte_mbuf_version.map   |   7 +
> > >  8 files changed, 959 insertions(+), 5 deletions(-)
> > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
> > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> > >
> > > diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> > > index b9c2b2500..01cafad59 100644
> > > --- a/app/test/test_mbuf.c
> > > +++ b/app/test/test_mbuf.c
> > > @@ -28,6 +28,7 @@
> > >  #include <rte_random.h>
> > >  #include <rte_cycles.h>
> > >  #include <rte_malloc.h>
> > > +#include <rte_mbuf_dyn.h>
> > >
> 
> [snip]
> > > +int
> > > +rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
> > > +				unsigned int req)
> > > +{
> > > +	int ret;
> > > +
> > > +	if (req != UINT_MAX && req >= 64) {
> > 
> > Might be better to replace 64 with something like sizeof(mbuf->ol_flags) * CHAR_BIT or so.
> 
> Might introduce a new macro like kernel:
> 
> /**
>  * FIELD_SIZEOF - get the size of a struct's field
>  * @t: the target struct
>  * @f: the target struct's field
>  * Return: the size of @f in the struct definition without having a
>  * declared instance of @t.
>  */
> #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
> 
> Then: FIELD_SIZEOF(rte_mbuf, ol_flags) * CHAR_BIT

Good idea, thanks


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-23 10:19     ` Olivier Matz
@ 2019-10-23 11:45       ` Olivier Matz
  2019-10-23 11:49         ` Ananyev, Konstantin
  0 siblings, 1 reply; 64+ messages in thread
From: Olivier Matz @ 2019-10-23 11:45 UTC (permalink / raw)
  To: Ananyev, Konstantin
  Cc: dev, Andrew Rybchenko, Richardson, Bruce, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Morten Brørup,
	Stephen Hemminger, Thomas Monjalon

On Wed, Oct 23, 2019 at 12:19:46PM +0200, Olivier Matz wrote:
> On Tue, Oct 22, 2019 at 10:51:51PM +0000, Ananyev, Konstantin wrote:

(...)

> > > +/* Allocate and initialize the shared memory. Assume tailq is locked */
> > > +static int
> > > +init_shared_mem(void)
> > > +{
> > > +	const struct rte_memzone *mz;
> > > +	uint64_t mask;
> > > +
> > > +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > > +		mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
> > > +						sizeof(struct mbuf_dyn_shm),
> > > +						SOCKET_ID_ANY, 0,
> > > +						RTE_CACHE_LINE_SIZE);
> > > +	} else {
> > > +		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
> > > +	}
> > > +	if (mz == NULL)
> > > +		return -1;
> > > +
> > > +	shm = mz->addr;
> > > +
> > > +#define mark_free(field)						\
> > > +	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
> > > +		1, sizeof(((struct rte_mbuf *)0)->field))
> > 
> > Still think it would look nicer without multi-line macro defines/undef in the middle of the function.
> 
> I rather think that macro helps to make the code more readable, but it's
> probably just a matter of taste. Will someone puts a contract on me if I
> keep it like this? If yes I'll do the change ;)

More seriously, do you prefer if I move the macro definition above the
function?

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-23 11:45       ` Olivier Matz
@ 2019-10-23 11:49         ` Ananyev, Konstantin
  0 siblings, 0 replies; 64+ messages in thread
From: Ananyev, Konstantin @ 2019-10-23 11:49 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Andrew Rybchenko, Richardson, Bruce, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Morten Brørup,
	Stephen Hemminger, Thomas Monjalon



> -----Original Message-----
> From: Olivier Matz <olivier.matz@6wind.com>
> Sent: Wednesday, October 23, 2019 12:46 PM
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> Cc: dev@dpdk.org; Andrew Rybchenko <arybchenko@solarflare.com>; Richardson, Bruce <bruce.richardson@intel.com>; Wang,
> Haiyue <haiyue.wang@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith <keith.wiles@intel.com>; Morten
> Brørup <mb@smartsharesystems.com>; Stephen Hemminger <stephen@networkplumber.org>; Thomas Monjalon
> <thomas@monjalon.net>
> Subject: Re: [PATCH v2] mbuf: support dynamic fields and flags
> 
> On Wed, Oct 23, 2019 at 12:19:46PM +0200, Olivier Matz wrote:
> > On Tue, Oct 22, 2019 at 10:51:51PM +0000, Ananyev, Konstantin wrote:
> 
> (...)
> 
> > > > +/* Allocate and initialize the shared memory. Assume tailq is locked */
> > > > +static int
> > > > +init_shared_mem(void)
> > > > +{
> > > > +	const struct rte_memzone *mz;
> > > > +	uint64_t mask;
> > > > +
> > > > +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > > > +		mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
> > > > +						sizeof(struct mbuf_dyn_shm),
> > > > +						SOCKET_ID_ANY, 0,
> > > > +						RTE_CACHE_LINE_SIZE);
> > > > +	} else {
> > > > +		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
> > > > +	}
> > > > +	if (mz == NULL)
> > > > +		return -1;
> > > > +
> > > > +	shm = mz->addr;
> > > > +
> > > > +#define mark_free(field)						\
> > > > +	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
> > > > +		1, sizeof(((struct rte_mbuf *)0)->field))
> > >
> > > Still think it would look nicer without multi-line macro defines/undef in the middle of the function.
> >
> > I rather think that macro helps to make the code more readable, but it's
> > probably just a matter of taste. Will someone puts a contract on me if I
> > keep it like this? If yes I'll do the change ;)
> 
> More seriously, do you prefer if I move the macro definition above the
> function?

Yes, would look better to me.


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-17 14:42 ` [dpdk-dev] [PATCH v2] " Olivier Matz
  2019-10-18  2:47   ` Wang, Haiyue
  2019-10-22 22:51   ` Ananyev, Konstantin
@ 2019-10-23 12:00   ` Shahaf Shuler
  2019-10-23 13:33     ` Olivier Matz
  2019-10-24  7:38   ` Slava Ovsiienko
  3 siblings, 1 reply; 64+ messages in thread
From: Shahaf Shuler @ 2019-10-23 12:00 UTC (permalink / raw)
  To: Olivier Matz, dev
  Cc: Andrew Rybchenko, Bruce Richardson, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Stephen Hemminger, Thomas Monjalon

Hi Olivier, 

Thursday, October 17, 2019 5:42 PM, Olivier Matz:
> Subject: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
> 
> Many features require to store data inside the mbuf. As the room in mbuf
> structure is limited, it is not possible to have a field for each feature. Also,
> changing fields in the mbuf structure can break the API or ABI.
> 
> This commit addresses these issues, by enabling the dynamic registration of
> fields or flags:
> 
> - a dynamic field is a named area in the rte_mbuf structure, with a
>   given size (>= 1 byte) and alignment constraint.
> - a dynamic flag is a named bit in the rte_mbuf structure.
> 
> The typical use case is a PMD that registers space for an offload feature,
> when the application requests to enable this feature.  As the space in mbuf is
> limited, the space should only be reserved if it is going to be used (i.e when
> the application explicitly asks for it).

According to description, the dynamic field enables custom application and supported PMDs to use the dynamic part of the mbuf for their specific needs. 
However the mechanism to report and activate the field/flag registration comes from the general OFFLOAD flags. 

Maybe it will be better to an option to query and select dynamic fields for PMD outside of the standard ethdev offload flags? 

> 
> The registration can be done at any moment, but it is not possible to
> unregister fields or flags for now.
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> Acked-by: Thomas Monjalon <thomas@monjalon.net>
> ---
> 
> v2
> 
> * Rebase on top of master: solve conflict with Stephen's patchset
>   (packet copy)
> * Add new apis to register a dynamic field/flag at a specific place
> * Add a dump function (sugg by David)
> * Enhance field registration function to select the best offset, keeping
>   large aligned zones as much as possible (sugg by Konstantin)
> * Use a size_t and unsigned int instead of int when relevant
>   (sugg by Konstantin)
> * Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
>   (sugg by Konstantin)
> * Remove unused argument in private function (sugg by Konstantin)
> * Fix and simplify locking (sugg by Konstantin)
> * Fix minor typo
> 
> rfc -> v1
> 
> * Rebase on top of master
> * Change registration API to use a structure instead of
>   variables, getting rid of #defines (Stephen's comment)
> * Update flag registration to use a similar API as fields.
> * Change max name length from 32 to 64 (sugg. by Thomas)
> * Enhance API documentation (Haiyue's and Andrew's comments)
> * Add a debug log at registration
> * Add some words in release note
> * Did some performance tests (sugg. by Andrew):
>   On my platform, reading a dynamic field takes ~3 cycles more
>   than a static field, and ~2 cycles more for writing.
> 
>  app/test/test_mbuf.c                   | 145 ++++++-
>  doc/guides/rel_notes/release_19_11.rst |   7 +
>  lib/librte_mbuf/Makefile               |   2 +
>  lib/librte_mbuf/meson.build            |   6 +-
>  lib/librte_mbuf/rte_mbuf.h             |  23 +-
>  lib/librte_mbuf/rte_mbuf_dyn.c         | 548 +++++++++++++++++++++++++
>  lib/librte_mbuf/rte_mbuf_dyn.h         | 226 ++++++++++
>  lib/librte_mbuf/rte_mbuf_version.map   |   7 +
>  8 files changed, 959 insertions(+), 5 deletions(-)  create mode 100644
> lib/librte_mbuf/rte_mbuf_dyn.c  create mode 100644
> lib/librte_mbuf/rte_mbuf_dyn.h
> 
> diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c index
> b9c2b2500..01cafad59 100644
> --- a/app/test/test_mbuf.c
> +++ b/app/test/test_mbuf.c
> @@ -28,6 +28,7 @@
>  #include <rte_random.h>
>  #include <rte_cycles.h>
>  #include <rte_malloc.h>
> +#include <rte_mbuf_dyn.h>
> 
>  #include "test.h"
> 
> @@ -657,7 +658,6 @@ test_attach_from_different_pool(struct
> rte_mempool *pktmbuf_pool,
>  		rte_pktmbuf_free(clone2);
>  	return -1;
>  }
> -#undef GOTO_FAIL
> 
>  /*
>   * test allocation and free of mbufs
> @@ -1276,6 +1276,143 @@ test_tx_offload(void)
>  	return (v1 == v2) ? 0 : -EINVAL;
>  }
> 
> +static int
> +test_mbuf_dyn(struct rte_mempool *pktmbuf_pool) {
> +	const struct rte_mbuf_dynfield dynfield = {
> +		.name = "test-dynfield",
> +		.size = sizeof(uint8_t),
> +		.align = __alignof__(uint8_t),
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynfield dynfield2 = {
> +		.name = "test-dynfield2",
> +		.size = sizeof(uint16_t),
> +		.align = __alignof__(uint16_t),
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynfield dynfield3 = {
> +		.name = "test-dynfield3",
> +		.size = sizeof(uint8_t),
> +		.align = __alignof__(uint8_t),
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynfield dynfield_fail_big = {
> +		.name = "test-dynfield-fail-big",
> +		.size = 256,
> +		.align = 1,
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynfield dynfield_fail_align = {
> +		.name = "test-dynfield-fail-align",
> +		.size = 1,
> +		.align = 3,
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynflag dynflag = {
> +		.name = "test-dynflag",
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynflag dynflag2 = {
> +		.name = "test-dynflag2",
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynflag dynflag3 = {
> +		.name = "test-dynflag3",
> +		.flags = 0,
> +	};
> +	struct rte_mbuf *m = NULL;
> +	int offset, offset2, offset3;
> +	int flag, flag2, flag3;
> +	int ret;
> +
> +	printf("Test mbuf dynamic fields and flags\n");
> +	rte_mbuf_dyn_dump(stdout);
> +
> +	offset = rte_mbuf_dynfield_register(&dynfield);
> +	if (offset == -1)
> +		GOTO_FAIL("failed to register dynamic field, offset=%d: %s",
> +			offset, strerror(errno));
> +
> +	ret = rte_mbuf_dynfield_register(&dynfield);
> +	if (ret != offset)
> +		GOTO_FAIL("failed to lookup dynamic field, ret=%d: %s",
> +			ret, strerror(errno));
> +
> +	offset2 = rte_mbuf_dynfield_register(&dynfield2);
> +	if (offset2 == -1 || offset2 == offset || (offset2 & 1))
> +		GOTO_FAIL("failed to register dynamic field 2, offset2=%d:
> %s",
> +			offset2, strerror(errno));
> +
> +	offset3 = rte_mbuf_dynfield_register_offset(&dynfield3,
> +				offsetof(struct rte_mbuf, dynfield1[1]));
> +	if (offset3 != offsetof(struct rte_mbuf, dynfield1[1]))
> +		GOTO_FAIL("failed to register dynamic field 3, offset=%d:
> %s",
> +			offset3, strerror(errno));
> +
> +	printf("dynfield: offset=%d, offset2=%d, offset3=%d\n",
> +		offset, offset2, offset3);
> +
> +	ret = rte_mbuf_dynfield_register(&dynfield_fail_big);
> +	if (ret != -1)
> +		GOTO_FAIL("dynamic field creation should fail (too big)");
> +
> +	ret = rte_mbuf_dynfield_register(&dynfield_fail_align);
> +	if (ret != -1)
> +		GOTO_FAIL("dynamic field creation should fail (bad
> alignment)");
> +
> +	ret = rte_mbuf_dynfield_register_offset(&dynfield_fail_align,
> +				offsetof(struct rte_mbuf, ol_flags));
> +	if (ret != -1)
> +		GOTO_FAIL("dynamic field creation should fail (not avail)");
> +
> +	flag = rte_mbuf_dynflag_register(&dynflag);
> +	if (flag == -1)
> +		GOTO_FAIL("failed to register dynamic flag, flag=%d: %s",
> +			flag, strerror(errno));
> +
> +	ret = rte_mbuf_dynflag_register(&dynflag);
> +	if (ret != flag)
> +		GOTO_FAIL("failed to lookup dynamic flag, ret=%d: %s",
> +			ret, strerror(errno));
> +
> +	flag2 = rte_mbuf_dynflag_register(&dynflag2);
> +	if (flag2 == -1 || flag2 == flag)
> +		GOTO_FAIL("failed to register dynamic flag 2, flag2=%d: %s",
> +			flag2, strerror(errno));
> +
> +	flag3 = rte_mbuf_dynflag_register_bitnum(&dynflag3,
> +						rte_bsf64(PKT_LAST_FREE));
> +	if (flag3 != rte_bsf64(PKT_LAST_FREE))
> +		GOTO_FAIL("failed to register dynamic flag 3, flag2=%d: %s",
> +			flag3, strerror(errno));
> +
> +	printf("dynflag: flag=%d, flag2=%d, flag3=%d\n", flag, flag2, flag3);
> +
> +	/* set, get dynamic field */
> +	m = rte_pktmbuf_alloc(pktmbuf_pool);
> +	if (m == NULL)
> +		GOTO_FAIL("Cannot allocate mbuf");
> +
> +	*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) = 1;
> +	if (*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) != 1)
> +		GOTO_FAIL("failed to read dynamic field");
> +	*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) = 1000;
> +	if (*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) != 1000)
> +		GOTO_FAIL("failed to read dynamic field");
> +
> +	/* set a dynamic flag */
> +	m->ol_flags |= (1ULL << flag);
> +
> +	rte_mbuf_dyn_dump(stdout);
> +	rte_pktmbuf_free(m);
> +	return 0;
> +fail:
> +	rte_pktmbuf_free(m);
> +	return -1;
> +}
> +#undef GOTO_FAIL
> +
>  static int
>  test_mbuf(void)
>  {
> @@ -1295,6 +1432,12 @@ test_mbuf(void)
>  		goto err;
>  	}
> 
> +	/* test registration of dynamic fields and flags */
> +	if (test_mbuf_dyn(pktmbuf_pool) < 0) {
> +		printf("mbuf dynflag test failed\n");
> +		goto err;
> +	}
> +
>  	/* create a specific pktmbuf pool with a priv_size != 0 and no data
>  	 * room size */
>  	pktmbuf_pool2 = rte_pktmbuf_pool_create("test_pktmbuf_pool2",
> diff --git a/doc/guides/rel_notes/release_19_11.rst
> b/doc/guides/rel_notes/release_19_11.rst
> index 85953b962..9e9c94554 100644
> --- a/doc/guides/rel_notes/release_19_11.rst
> +++ b/doc/guides/rel_notes/release_19_11.rst
> @@ -21,6 +21,13 @@ DPDK Release 19.11
> 
>        xdg-open build/doc/html/guides/rel_notes/release_19_11.html
> 
> +* **Add support of support dynamic fields and flags in mbuf.**
> +
> +  This new feature adds the ability to dynamically register some room
> + for a field or a flag in the mbuf structure. This is typically used
> + for specific offload features, where adding a static field or flag  in
> + the mbuf is not justified.
> +
> 
>  New Features
>  ------------
> diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile index
> c8f6d2689..5a9bcee73 100644
> --- a/lib/librte_mbuf/Makefile
> +++ b/lib/librte_mbuf/Makefile
> @@ -17,8 +17,10 @@ LIBABIVER := 5
> 
>  # all source are stored in SRCS-y
>  SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c
> rte_mbuf_pool_ops.c
> +SRCS-$(CONFIG_RTE_LIBRTE_MBUF) += rte_mbuf_dyn.c
> 
>  # install includes
>  SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h
> rte_mbuf_ptype.h rte_mbuf_pool_ops.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include += rte_mbuf_dyn.h
> 
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build index
> 6cc11ebb4..9137e8f26 100644
> --- a/lib/librte_mbuf/meson.build
> +++ b/lib/librte_mbuf/meson.build
> @@ -2,8 +2,10 @@
>  # Copyright(c) 2017 Intel Corporation
> 
>  version = 5
> -sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c') -
> headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
> +sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c',
> +	'rte_mbuf_dyn.c')
> +headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h',
> +	'rte_mbuf_dyn.h')
>  deps += ['mempool']
> 
>  allow_experimental_apis = true
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index
> fb0849ac1..5740b1e93 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -198,9 +198,12 @@ extern "C" {
>  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
>  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
> 
> -/* add new RX flags here */
> +/* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> 
> -/* add new TX flags here */
> +#define PKT_FIRST_FREE (1ULL << 23)
> +#define PKT_LAST_FREE (1ULL << 39)
> +
> +/* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> 
>  /**
>   * Indicate that the metadata field in the mbuf is in use.
> @@ -738,6 +741,7 @@ struct rte_mbuf {
>  	 */
>  	struct rte_mbuf_ext_shared_info *shinfo;
> 
> +	uint64_t dynfield1[2]; /**< Reserved for dynamic fields. */
>  } __rte_cache_aligned;
> 
>  /**
> @@ -1684,6 +1688,20 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m,
> void *buf_addr,
>   */
>  #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
> 
> +/**
> + * Copy dynamic fields from m_src to m_dst.
> + *
> + * @param m_dst
> + *   The destination mbuf.
> + * @param m_src
> + *   The source mbuf.
> + */
> +static inline void
> +rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf
> +*msrc) {
> +	memcpy(&mdst->dynfield1, msrc->dynfield1, sizeof(mdst-
> >dynfield1)); }
> +
>  /* internal */
>  static inline void
>  __rte_pktmbuf_copy_hdr(struct rte_mbuf *mdst, const struct rte_mbuf
> *msrc) @@ -1695,6 +1713,7 @@ __rte_pktmbuf_copy_hdr(struct rte_mbuf
> *mdst, const struct rte_mbuf *msrc)
>  	mdst->hash = msrc->hash;
>  	mdst->packet_type = msrc->packet_type;
>  	mdst->timestamp = msrc->timestamp;
> +	rte_mbuf_dynfield_copy(mdst, msrc);
>  }
> 
>  /**
> diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c
> b/lib/librte_mbuf/rte_mbuf_dyn.c new file mode 100644 index
> 000000000..9ef235483
> --- /dev/null
> +++ b/lib/librte_mbuf/rte_mbuf_dyn.c
> @@ -0,0 +1,548 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2019 6WIND S.A.
> + */
> +
> +#include <sys/queue.h>
> +#include <stdint.h>
> +#include <limits.h>
> +
> +#include <rte_common.h>
> +#include <rte_eal.h>
> +#include <rte_eal_memconfig.h>
> +#include <rte_tailq.h>
> +#include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_string_fns.h>
> +#include <rte_mbuf.h>
> +#include <rte_mbuf_dyn.h>
> +
> +#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
> +
> +struct mbuf_dynfield_elt {
> +	TAILQ_ENTRY(mbuf_dynfield_elt) next;
> +	struct rte_mbuf_dynfield params;
> +	size_t offset;
> +};
> +TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem mbuf_dynfield_tailq = {
> +	.name = "RTE_MBUF_DYNFIELD",
> +};
> +EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
> +
> +struct mbuf_dynflag_elt {
> +	TAILQ_ENTRY(mbuf_dynflag_elt) next;
> +	struct rte_mbuf_dynflag params;
> +	unsigned int bitnum;
> +};
> +TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem mbuf_dynflag_tailq = {
> +	.name = "RTE_MBUF_DYNFLAG",
> +};
> +EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
> +
> +struct mbuf_dyn_shm {
> +	/**
> +	 * For each mbuf byte, free_space[i] != 0 if space is free.
> +	 * The value is the size of the biggest aligned element that
> +	 * can fit in the zone.
> +	 */
> +	uint8_t free_space[sizeof(struct rte_mbuf)];
> +	/** Bitfield of available flags. */
> +	uint64_t free_flags;
> +};
> +static struct mbuf_dyn_shm *shm;
> +
> +/* Set the value of free_space[] according to the size and alignment of
> + * the free areas. This helps to select the best place when reserving a
> + * dynamic field. Assume tailq is locked.
> + */
> +static void
> +process_score(void)
> +{
> +	size_t off, align, size, i;
> +
> +	/* first, erase previous info */
> +	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
> +		if (shm->free_space[i])
> +			shm->free_space[i] = 1;
> +	}
> +
> +	for (off = 0; off < sizeof(struct rte_mbuf); off++) {
> +		/* get the size of the free zone */
> +		for (size = 0; shm->free_space[off + size]; size++)
> +			;
> +		if (size == 0)
> +			continue;
> +
> +		/* get the alignment of biggest object that can fit in
> +		 * the zone at this offset.
> +		 */
> +		for (align = 1;
> +		     (off % (align << 1)) == 0 && (align << 1) <= size;
> +		     align <<= 1)
> +			;
> +
> +		/* save it in free_space[] */
> +		for (i = off; i < off + size; i++)
> +			shm->free_space[i] = RTE_MAX(align, shm-
> >free_space[i]);
> +	}
> +}
> +
> +/* Allocate and initialize the shared memory. Assume tailq is locked */
> +static int
> +init_shared_mem(void)
> +{
> +	const struct rte_memzone *mz;
> +	uint64_t mask;
> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		mz =
> rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
> +						sizeof(struct
> mbuf_dyn_shm),
> +						SOCKET_ID_ANY, 0,
> +						RTE_CACHE_LINE_SIZE);
> +	} else {
> +		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
> +	}
> +	if (mz == NULL)
> +		return -1;
> +
> +	shm = mz->addr;
> +
> +#define mark_free(field)						\
> +	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
> +		1, sizeof(((struct rte_mbuf *)0)->field))
> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		/* init free_space, keep it sync'd with
> +		 * rte_mbuf_dynfield_copy().
> +		 */
> +		memset(shm, 0, sizeof(*shm));
> +		mark_free(dynfield1);
> +
> +		/* init free_flags */
> +		for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask
> <<= 1)
> +			shm->free_flags |= mask;
> +
> +		process_score();
> +	}
> +#undef mark_free
> +
> +	return 0;
> +}
> +
> +/* check if this offset can be used */
> +static int
> +check_offset(size_t offset, size_t size, size_t align) {
> +	size_t i;
> +
> +	if ((offset & (align - 1)) != 0)
> +		return -1;
> +	if (offset + size > sizeof(struct rte_mbuf))
> +		return -1;
> +
> +	for (i = 0; i < size; i++) {
> +		if (!shm->free_space[i + offset])
> +			return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +/* assume tailq is locked */
> +static struct mbuf_dynfield_elt *
> +__mbuf_dynfield_lookup(const char *name) {
> +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> +	struct mbuf_dynfield_elt *mbuf_dynfield;
> +	struct rte_tailq_entry *te;
> +
> +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> +
> +	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
> +		mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
> +		if (strcmp(name, mbuf_dynfield->params.name) == 0)
> +			break;
> +	}
> +
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +
> +	return mbuf_dynfield;
> +}
> +
> +int
> +rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield
> +*params) {
> +	struct mbuf_dynfield_elt *mbuf_dynfield;
> +
> +	if (shm == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_read_lock();
> +	mbuf_dynfield = __mbuf_dynfield_lookup(name);
> +	rte_mcfg_tailq_read_unlock();
> +
> +	if (mbuf_dynfield == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	if (params != NULL)
> +		memcpy(params, &mbuf_dynfield->params,
> sizeof(*params));
> +
> +	return mbuf_dynfield->offset;
> +}
> +
> +static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
> +		const struct rte_mbuf_dynfield *params2) {
> +	if (strcmp(params1->name, params2->name))
> +		return -1;
> +	if (params1->size != params2->size)
> +		return -1;
> +	if (params1->align != params2->align)
> +		return -1;
> +	if (params1->flags != params2->flags)
> +		return -1;
> +	return 0;
> +}
> +
> +/* assume tailq is locked */
> +static int
> +__rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield
> *params,
> +				size_t req)
> +{
> +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> +	struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
> +	struct rte_tailq_entry *te = NULL;
> +	unsigned int best_zone = UINT_MAX;
> +	size_t i, offset;
> +	int ret;
> +
> +	if (shm == NULL && init_shared_mem() < 0)
> +		return -1;
> +
> +	mbuf_dynfield = __mbuf_dynfield_lookup(params->name);
> +	if (mbuf_dynfield != NULL) {
> +		if (req != SIZE_MAX && req != mbuf_dynfield->offset) {
> +			rte_errno = EEXIST;
> +			return -1;
> +		}
> +		if (mbuf_dynfield_cmp(params, &mbuf_dynfield->params) <
> 0) {
> +			rte_errno = EEXIST;
> +			return -1;
> +		}
> +		return mbuf_dynfield->offset;
> +	}
> +
> +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> +		rte_errno = EPERM;
> +		return -1;
> +	}
> +
> +	if (req == SIZE_MAX) {
> +		for (offset = 0;
> +		     offset < sizeof(struct rte_mbuf);
> +		     offset++) {
> +			if (check_offset(offset, params->size,
> +						params->align) == 0 &&
> +					shm->free_space[offset] <
> best_zone) {
> +				best_zone = shm->free_space[offset];
> +				req = offset;
> +			}
> +		}
> +		if (req == SIZE_MAX) {
> +			rte_errno = ENOENT;
> +			return -1;
> +		}
> +	} else {
> +		if (check_offset(req, params->size, params->align) < 0) {
> +			rte_errno = EBUSY;
> +			return -1;
> +		}
> +	}
> +
> +	offset = req;
> +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> +
> +	te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL)
> +		return -1;
> +
> +	mbuf_dynfield = rte_zmalloc("mbuf_dynfield",
> sizeof(*mbuf_dynfield), 0);
> +	if (mbuf_dynfield == NULL) {
> +		rte_free(te);
> +		return -1;
> +	}
> +
> +	ret = strlcpy(mbuf_dynfield->params.name, params->name,
> +		sizeof(mbuf_dynfield->params.name));
> +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->params.name)) {
> +		rte_errno = ENAMETOOLONG;
> +		rte_free(mbuf_dynfield);
> +		rte_free(te);
> +		return -1;
> +	}
> +	memcpy(&mbuf_dynfield->params, params, sizeof(mbuf_dynfield-
> >params));
> +	mbuf_dynfield->offset = offset;
> +	te->data = mbuf_dynfield;
> +
> +	TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
> +
> +	for (i = offset; i < offset + params->size; i++)
> +		shm->free_space[i] = 0;
> +	process_score();
> +
> +	RTE_LOG(DEBUG, MBUF, "Registered dynamic field %s (sz=%zu,
> al=%zu, fl=0x%x) -> %zd\n",
> +		params->name, params->size, params->align, params->flags,
> +		offset);
> +
> +	return offset;
> +}
> +
> +int
> +rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield
> *params,
> +				size_t req)
> +{
> +	int ret;
> +
> +	if (params->size >= sizeof(struct rte_mbuf)) {
> +		rte_errno = EINVAL;
> +		return -1;
> +	}
> +	if (!rte_is_power_of_2(params->align)) {
> +		rte_errno = EINVAL;
> +		return -1;
> +	}
> +	if (params->flags != 0) {
> +		rte_errno = EINVAL;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_write_lock();
> +	ret = __rte_mbuf_dynfield_register_offset(params, req);
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ret;
> +}
> +
> +int
> +rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params) {
> +	return rte_mbuf_dynfield_register_offset(params, SIZE_MAX); }
> +
> +/* assume tailq is locked */
> +static struct mbuf_dynflag_elt *
> +__mbuf_dynflag_lookup(const char *name) {
> +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> +	struct mbuf_dynflag_elt *mbuf_dynflag;
> +	struct rte_tailq_entry *te;
> +
> +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> +
> +	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
> +		mbuf_dynflag = (struct mbuf_dynflag_elt *)te->data;
> +		if (strncmp(name, mbuf_dynflag->params.name,
> +				RTE_MBUF_DYN_NAMESIZE) == 0)
> +			break;
> +	}
> +
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +
> +	return mbuf_dynflag;
> +}
> +
> +int
> +rte_mbuf_dynflag_lookup(const char *name,
> +			struct rte_mbuf_dynflag *params)
> +{
> +	struct mbuf_dynflag_elt *mbuf_dynflag;
> +
> +	if (shm == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_read_lock();
> +	mbuf_dynflag = __mbuf_dynflag_lookup(name);
> +	rte_mcfg_tailq_read_unlock();
> +
> +	if (mbuf_dynflag == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	if (params != NULL)
> +		memcpy(params, &mbuf_dynflag->params,
> sizeof(*params));
> +
> +	return mbuf_dynflag->bitnum;
> +}
> +
> +static int mbuf_dynflag_cmp(const struct rte_mbuf_dynflag *params1,
> +		const struct rte_mbuf_dynflag *params2) {
> +	if (strcmp(params1->name, params2->name))
> +		return -1;
> +	if (params1->flags != params2->flags)
> +		return -1;
> +	return 0;
> +}
> +
> +/* assume tailq is locked */
> +static int
> +__rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag
> *params,
> +				unsigned int req)
> +{
> +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> +	struct mbuf_dynflag_elt *mbuf_dynflag = NULL;
> +	struct rte_tailq_entry *te = NULL;
> +	unsigned int bitnum;
> +	int ret;
> +
> +	if (shm == NULL && init_shared_mem() < 0)
> +		return -1;
> +
> +	mbuf_dynflag = __mbuf_dynflag_lookup(params->name);
> +	if (mbuf_dynflag != NULL) {
> +		if (req != UINT_MAX && req != mbuf_dynflag->bitnum) {
> +			rte_errno = EEXIST;
> +			return -1;
> +		}
> +		if (mbuf_dynflag_cmp(params, &mbuf_dynflag->params) <
> 0) {
> +			rte_errno = EEXIST;
> +			return -1;
> +		}
> +		return mbuf_dynflag->bitnum;
> +	}
> +
> +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> +		rte_errno = EPERM;
> +		return -1;
> +	}
> +
> +	if (req == UINT_MAX) {
> +		if (shm->free_flags == 0) {
> +			rte_errno = ENOENT;
> +			return -1;
> +		}
> +		bitnum = rte_bsf64(shm->free_flags);
> +	} else {
> +		if ((shm->free_flags & (1ULL << req)) == 0) {
> +			rte_errno = EBUSY;
> +			return -1;
> +		}
> +		bitnum = req;
> +	}
> +
> +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> +
> +	te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL)
> +		return -1;
> +
> +	mbuf_dynflag = rte_zmalloc("mbuf_dynflag",
> sizeof(*mbuf_dynflag), 0);
> +	if (mbuf_dynflag == NULL) {
> +		rte_free(te);
> +		return -1;
> +	}
> +
> +	ret = strlcpy(mbuf_dynflag->params.name, params->name,
> +		sizeof(mbuf_dynflag->params.name));
> +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->params.name)) {
> +		rte_free(mbuf_dynflag);
> +		rte_free(te);
> +		rte_errno = ENAMETOOLONG;
> +		return -1;
> +	}
> +	mbuf_dynflag->bitnum = bitnum;
> +	te->data = mbuf_dynflag;
> +
> +	TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
> +
> +	shm->free_flags &= ~(1ULL << bitnum);
> +
> +	RTE_LOG(DEBUG, MBUF, "Registered dynamic flag %s (fl=0x%x) ->
> %u\n",
> +		params->name, params->flags, bitnum);
> +
> +	return bitnum;
> +}
> +
> +int
> +rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag
> *params,
> +				unsigned int req)
> +{
> +	int ret;
> +
> +	if (req != UINT_MAX && req >= 64) {
> +		rte_errno = EINVAL;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_write_lock();
> +	ret = __rte_mbuf_dynflag_register_bitnum(params, req);
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ret;
> +}
> +
> +int
> +rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params) {
> +	return rte_mbuf_dynflag_register_bitnum(params, UINT_MAX); }
> +
> +void rte_mbuf_dyn_dump(FILE *out)
> +{
> +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> +	struct mbuf_dynfield_elt *dynfield;
> +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> +	struct mbuf_dynflag_elt *dynflag;
> +	struct rte_tailq_entry *te;
> +	size_t i;
> +
> +	rte_mcfg_tailq_write_lock();
> +	init_shared_mem();
> +	fprintf(out, "Reserved fields:\n");
> +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> +	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
> +		dynfield = (struct mbuf_dynfield_elt *)te->data;
> +		fprintf(out, "  name=%s offset=%zd size=%zd align=%zd
> flags=%x\n",
> +			dynfield->params.name, dynfield->offset,
> +			dynfield->params.size, dynfield->params.align,
> +			dynfield->params.flags);
> +	}
> +	fprintf(out, "Reserved flags:\n");
> +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> +	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
> +		dynflag = (struct mbuf_dynflag_elt *)te->data;
> +		fprintf(out, "  name=%s bitnum=%u flags=%x\n",
> +			dynflag->params.name, dynflag->bitnum,
> +			dynflag->params.flags);
> +	}
> +	fprintf(out, "Free space in mbuf (0 = free, value = zone
> alignment):\n");
> +	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
> +		if ((i % 8) == 0)
> +			fprintf(out, "  %4.4zx: ", i);
> +		fprintf(out, "%2.2x%s", shm->free_space[i],
> +			(i % 8 != 7) ? " " : "\n");
> +	}
> +	rte_mcfg_tailq_write_unlock();
> +}
> diff --git a/lib/librte_mbuf/rte_mbuf_dyn.h
> b/lib/librte_mbuf/rte_mbuf_dyn.h new file mode 100644 index
> 000000000..307613c96
> --- /dev/null
> +++ b/lib/librte_mbuf/rte_mbuf_dyn.h
> @@ -0,0 +1,226 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2019 6WIND S.A.
> + */
> +
> +#ifndef _RTE_MBUF_DYN_H_
> +#define _RTE_MBUF_DYN_H_
> +
> +/**
> + * @file
> + * RTE Mbuf dynamic fields and flags
> + *
> + * Many features require to store data inside the mbuf. As the room in
> + * mbuf structure is limited, it is not possible to have a field for
> + * each feature. Also, changing fields in the mbuf structure can break
> + * the API or ABI.
> + *
> + * This module addresses this issue, by enabling the dynamic
> + * registration of fields or flags:
> + *
> + * - a dynamic field is a named area in the rte_mbuf structure, with a
> + *   given size (>= 1 byte) and alignment constraint.
> + * - a dynamic flag is a named bit in the rte_mbuf structure, stored
> + *   in mbuf->ol_flags.
> + *
> + * The typical use case is when a specific offload feature requires to
> + * register a dedicated offload field in the mbuf structure, and adding
> + * a static field or flag is not justified.
> + *
> + * Example of use:
> + *
> + * - A rte_mbuf_dynfield structure is defined, containing the parameters
> + *   of the dynamic field to be registered:
> + *   const struct rte_mbuf_dynfield rte_dynfield_my_feature = { ... };
> + * - The application initializes the PMD, and asks for this feature
> + *   at port initialization by passing DEV_RX_OFFLOAD_MY_FEATURE in
> + *   rxconf. This will make the PMD to register the field by calling
> + *   rte_mbuf_dynfield_register(&rte_dynfield_my_feature). The PMD
> + *   stores the returned offset.
> + * - The application that uses the offload feature also registers
> + *   the field to retrieve the same offset.
> + * - When the PMD receives a packet, it can set the field:
> + *   *RTE_MBUF_DYNFIELD(m, offset, <type *>) = value;
> + * - In the main loop, the application can retrieve the value with
> + *   the same macro.
> + *
> + * To avoid wasting space, the dynamic fields or flags must only be
> + * reserved on demand, when an application asks for the related feature.
> + *
> + * The registration can be done at any moment, but it is not possible
> + * to unregister fields or flags for now.
> + *
> + * A dynamic field can be reserved and used by an application only.
> + * It can for instance be a packet mark.
> + */
> +
> +#include <sys/types.h>
> +/**
> + * Maximum length of the dynamic field or flag string.
> + */
> +#define RTE_MBUF_DYN_NAMESIZE 64
> +
> +/**
> + * Structure describing the parameters of a mbuf dynamic field.
> + */
> +struct rte_mbuf_dynfield {
> +	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the field. */
> +	size_t size;        /**< The number of bytes to reserve. */
> +	size_t align;       /**< The alignment constraint (power of 2). */
> +	unsigned int flags; /**< Reserved for future use, must be 0. */ };
> +
> +/**
> + * Structure describing the parameters of a mbuf dynamic flag.
> + */
> +struct rte_mbuf_dynflag {
> +	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the dynamic
> flag. */
> +	unsigned int flags; /**< Reserved for future use, must be 0. */ };
> +
> +/**
> + * Register space for a dynamic field in the mbuf structure.
> + *
> + * If the field is already registered (same name and parameters), its
> + * offset is returned.
> + *
> + * @param params
> + *   A structure containing the requested parameters (name, size,
> + *   alignment constraint and flags).
> + * @return
> + *   The offset in the mbuf structure, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - EINVAL: invalid parameters (size, align, or flags).
> + *   - EEXIST: this name is already register with different parameters.
> + *   - EPERM: called from a secondary process.
> + *   - ENOENT: not enough room in mbuf.
> + *   - ENOMEM: allocation failure.
> + *   - ENAMETOOLONG: name does not ends with \0.
> + */
> +__rte_experimental
> +int rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params);
> +
> +/**
> + * Register space for a dynamic field in the mbuf structure at offset.
> + *
> + * If the field is already registered (same name, parameters and
> +offset),
> + * the offset is returned.
> + *
> + * @param params
> + *   A structure containing the requested parameters (name, size,
> + *   alignment constraint and flags).
> + * @param offset
> + *   The requested offset. Ignored if SIZE_MAX is passed.
> + * @return
> + *   The offset in the mbuf structure, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - EINVAL: invalid parameters (size, align, flags, or offset).
> + *   - EEXIST: this name is already register with different parameters.
> + *   - EBUSY: the requested offset cannot be used.
> + *   - EPERM: called from a secondary process.
> + *   - ENOENT: not enough room in mbuf.
> + *   - ENOMEM: allocation failure.
> + *   - ENAMETOOLONG: name does not ends with \0.
> + */
> +__rte_experimental
> +int rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield
> *params,
> +				size_t offset);
> +
> +/**
> + * Lookup for a registered dynamic mbuf field.
> + *
> + * @param name
> + *   A string identifying the dynamic field.
> + * @param params
> + *   If not NULL, and if the lookup is successful, the structure is
> + *   filled with the parameters of the dynamic field.
> + * @return
> + *   The offset of this field in the mbuf structure, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - ENOENT: no dynamic field matches this name.
> + */
> +__rte_experimental
> +int rte_mbuf_dynfield_lookup(const char *name,
> +			struct rte_mbuf_dynfield *params);
> +
> +/**
> + * Register a dynamic flag in the mbuf structure.
> + *
> + * If the flag is already registered (same name and parameters), its
> + * bitnum is returned.
> + *
> + * @param params
> + *   A structure containing the requested parameters of the dynamic
> + *   flag (name and options).
> + * @return
> + *   The number of the reserved bit, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - EINVAL: invalid parameters (size, align, or flags).
> + *   - EEXIST: this name is already register with different parameters.
> + *   - EPERM: called from a secondary process.
> + *   - ENOENT: no more flag available.
> + *   - ENOMEM: allocation failure.
> + *   - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE -
> 1.
> + */
> +__rte_experimental
> +int rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params);
> +
> +/**
> + * Register a dynamic flag in the mbuf structure specifying bitnum.
> + *
> + * If the flag is already registered (same name, parameters and
> +bitnum),
> + * the bitnum is returned.
> + *
> + * @param params
> + *   A structure containing the requested parameters of the dynamic
> + *   flag (name and options).
> + * @param bitnum
> + *   The requested bitnum. Ignored if UINT_MAX is passed.
> + * @return
> + *   The number of the reserved bit, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - EINVAL: invalid parameters (size, align, or flags).
> + *   - EEXIST: this name is already register with different parameters.
> + *   - EBUSY: the requested bitnum cannot be used.
> + *   - EPERM: called from a secondary process.
> + *   - ENOENT: no more flag available.
> + *   - ENOMEM: allocation failure.
> + *   - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE -
> 1.
> + */
> +__rte_experimental
> +int rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag
> *params,
> +				unsigned int bitnum);
> +
> +/**
> + * Lookup for a registered dynamic mbuf flag.
> + *
> + * @param name
> + *   A string identifying the dynamic flag.
> + * @param params
> + *   If not NULL, and if the lookup is successful, the structure is
> + *   filled with the parameters of the dynamic flag.
> + * @return
> + *   The offset of this flag in the mbuf structure, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - ENOENT: no dynamic flag matches this name.
> + */
> +__rte_experimental
> +int rte_mbuf_dynflag_lookup(const char *name,
> +			struct rte_mbuf_dynflag *params);
> +
> +/**
> + * Helper macro to access to a dynamic field.
> + */
> +#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) +
> +(offset)))
> +
> +/**
> + * Dump the status of dynamic fields and flags.
> + *
> + * @param out
> + *   The stream where the status is displayed.
> + */
> +__rte_experimental
> +void rte_mbuf_dyn_dump(FILE *out);
> +
> +/* Placeholder for dynamic fields and flags declarations. */
> +
> +#endif
> diff --git a/lib/librte_mbuf/rte_mbuf_version.map
> b/lib/librte_mbuf/rte_mbuf_version.map
> index 519fead35..9bf5ca37a 100644
> --- a/lib/librte_mbuf/rte_mbuf_version.map
> +++ b/lib/librte_mbuf/rte_mbuf_version.map
> @@ -58,6 +58,13 @@ EXPERIMENTAL {
>  	global:
> 
>  	rte_mbuf_check;
> +	rte_mbuf_dynfield_lookup;
> +	rte_mbuf_dynfield_register;
> +	rte_mbuf_dynfield_register_offset;
> +	rte_mbuf_dynflag_lookup;
> +	rte_mbuf_dynflag_register;
> +	rte_mbuf_dynflag_register_bitnum;
> +	rte_mbuf_dyn_dump;
>  	rte_pktmbuf_copy;
> 
>  } DPDK_18.08;
> --
> 2.20.1


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-23 12:00   ` Shahaf Shuler
@ 2019-10-23 13:33     ` Olivier Matz
  2019-10-24  4:54       ` Shahaf Shuler
  0 siblings, 1 reply; 64+ messages in thread
From: Olivier Matz @ 2019-10-23 13:33 UTC (permalink / raw)
  To: Shahaf Shuler
  Cc: dev, Andrew Rybchenko, Bruce Richardson, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Stephen Hemminger, Thomas Monjalon

Hi Shahaf,

On Wed, Oct 23, 2019 at 12:00:30PM +0000, Shahaf Shuler wrote:
> Hi Olivier, 
> 
> Thursday, October 17, 2019 5:42 PM, Olivier Matz:
> > Subject: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
> > 
> > Many features require to store data inside the mbuf. As the room in mbuf
> > structure is limited, it is not possible to have a field for each feature. Also,
> > changing fields in the mbuf structure can break the API or ABI.
> > 
> > This commit addresses these issues, by enabling the dynamic registration of
> > fields or flags:
> > 
> > - a dynamic field is a named area in the rte_mbuf structure, with a
> >   given size (>= 1 byte) and alignment constraint.
> > - a dynamic flag is a named bit in the rte_mbuf structure.
> > 
> > The typical use case is a PMD that registers space for an offload feature,
> > when the application requests to enable this feature.  As the space in mbuf is
> > limited, the space should only be reserved if it is going to be used (i.e when
> > the application explicitly asks for it).
> 
> According to description, the dynamic field enables custom application and supported PMDs to use the dynamic part of the mbuf for their specific needs. 
> However the mechanism to report and activate the field/flag registration comes from the general OFFLOAD flags. 
> 
> Maybe it will be better to an option to query and select dynamic fields for PMD outside of the standard ethdev offload flags? 

It is not mandatory to use the ethdev layer to register a dynamic field
or flag in the mbuf. It is just the typical use case.

It can also be enabled when using a library that have specific needs,
for instance, you call rte_reorder_init(), and it will register the
sequence number dynamic field.

An application that requires a specific mbuf field can also do the
registration by itself.

In other words, when you initialize a subpart that needs a dynamic field
or flag, you have to do the registration there.



> 
> > 
> > The registration can be done at any moment, but it is not possible to
> > unregister fields or flags for now.
> > 
> > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > Acked-by: Thomas Monjalon <thomas@monjalon.net>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-23 10:21       ` Olivier Matz
@ 2019-10-23 15:00         ` Stephen Hemminger
  2019-10-23 15:12           ` Wang, Haiyue
  0 siblings, 1 reply; 64+ messages in thread
From: Stephen Hemminger @ 2019-10-23 15:00 UTC (permalink / raw)
  To: Olivier Matz
  Cc: Wang, Haiyue, Ananyev, Konstantin, dev, Andrew Rybchenko,
	Richardson, Bruce, Jerin Jacob Kollanukkaran, Wiles, Keith,
	Morten Brørup, Thomas Monjalon

On Wed, 23 Oct 2019 12:21:43 +0200
Olivier Matz <olivier.matz@6wind.com> wrote:

> On Wed, Oct 23, 2019 at 03:16:13AM +0000, Wang, Haiyue wrote:
> > > -----Original Message-----
> > > From: Ananyev, Konstantin
> > > Sent: Wednesday, October 23, 2019 06:52
> > > To: Olivier Matz <olivier.matz@6wind.com>; dev@dpdk.org
> > > Cc: Andrew Rybchenko <arybchenko@solarflare.com>; Richardson, Bruce <bruce.richardson@intel.com>; Wang,
> > > Haiyue <haiyue.wang@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> > > <keith.wiles@intel.com>; Morten Brørup <mb@smartsharesystems.com>; Stephen Hemminger
> > > <stephen@networkplumber.org>; Thomas Monjalon <thomas@monjalon.net>
> > > Subject: RE: [PATCH v2] mbuf: support dynamic fields and flags
> > > 
> > >   
> > > > Many features require to store data inside the mbuf. As the room in mbuf
> > > > structure is limited, it is not possible to have a field for each
> > > > feature. Also, changing fields in the mbuf structure can break the API
> > > > or ABI.
> > > >
> > > > This commit addresses these issues, by enabling the dynamic registration
> > > > of fields or flags:
> > > >
> > > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > > >   given size (>= 1 byte) and alignment constraint.
> > > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > > >
> > > > The typical use case is a PMD that registers space for an offload
> > > > feature, when the application requests to enable this feature.  As
> > > > the space in mbuf is limited, the space should only be reserved if it
> > > > is going to be used (i.e when the application explicitly asks for it).
> > > >
> > > > The registration can be done at any moment, but it is not possible
> > > > to unregister fields or flags for now.
> > > >
> > > > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > > > ---
> > > >
> > > > v2
> > > >
> > > > * Rebase on top of master: solve conflict with Stephen's patchset
> > > >   (packet copy)
> > > > * Add new apis to register a dynamic field/flag at a specific place
> > > > * Add a dump function (sugg by David)
> > > > * Enhance field registration function to select the best offset, keeping
> > > >   large aligned zones as much as possible (sugg by Konstantin)
> > > > * Use a size_t and unsigned int instead of int when relevant
> > > >   (sugg by Konstantin)
> > > > * Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
> > > >   (sugg by Konstantin)
> > > > * Remove unused argument in private function (sugg by Konstantin)
> > > > * Fix and simplify locking (sugg by Konstantin)
> > > > * Fix minor typo
> > > >
> > > > rfc -> v1
> > > >
> > > > * Rebase on top of master
> > > > * Change registration API to use a structure instead of
> > > >   variables, getting rid of #defines (Stephen's comment)
> > > > * Update flag registration to use a similar API as fields.
> > > > * Change max name length from 32 to 64 (sugg. by Thomas)
> > > > * Enhance API documentation (Haiyue's and Andrew's comments)
> > > > * Add a debug log at registration
> > > > * Add some words in release note
> > > > * Did some performance tests (sugg. by Andrew):
> > > >   On my platform, reading a dynamic field takes ~3 cycles more
> > > >   than a static field, and ~2 cycles more for writing.
> > > >
> > > >  app/test/test_mbuf.c                   | 145 ++++++-
> > > >  doc/guides/rel_notes/release_19_11.rst |   7 +
> > > >  lib/librte_mbuf/Makefile               |   2 +
> > > >  lib/librte_mbuf/meson.build            |   6 +-
> > > >  lib/librte_mbuf/rte_mbuf.h             |  23 +-
> > > >  lib/librte_mbuf/rte_mbuf_dyn.c         | 548 +++++++++++++++++++++++++
> > > >  lib/librte_mbuf/rte_mbuf_dyn.h         | 226 ++++++++++
> > > >  lib/librte_mbuf/rte_mbuf_version.map   |   7 +
> > > >  8 files changed, 959 insertions(+), 5 deletions(-)
> > > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
> > > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> > > >
> > > > diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> > > > index b9c2b2500..01cafad59 100644
> > > > --- a/app/test/test_mbuf.c
> > > > +++ b/app/test/test_mbuf.c
> > > > @@ -28,6 +28,7 @@
> > > >  #include <rte_random.h>
> > > >  #include <rte_cycles.h>
> > > >  #include <rte_malloc.h>
> > > > +#include <rte_mbuf_dyn.h>
> > > >  
> > 
> > [snip]  
> > > > +int
> > > > +rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
> > > > +				unsigned int req)
> > > > +{
> > > > +	int ret;
> > > > +
> > > > +	if (req != UINT_MAX && req >= 64) {  
> > > 
> > > Might be better to replace 64 with something like sizeof(mbuf->ol_flags) * CHAR_BIT or so.  
> > 
> > Might introduce a new macro like kernel:
> > 
> > /**
> >  * FIELD_SIZEOF - get the size of a struct's field
> >  * @t: the target struct
> >  * @f: the target struct's field
> >  * Return: the size of @f in the struct definition without having a
> >  * declared instance of @t.
> >  */
> > #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
> > 
> > Then: FIELD_SIZEOF(rte_mbuf, ol_flags) * CHAR_BIT  
> 
> Good idea, thanks
> 

Kernel is replacing FIELD_SIZEOF with sizeof_member

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-23 15:00         ` Stephen Hemminger
@ 2019-10-23 15:12           ` Wang, Haiyue
  0 siblings, 0 replies; 64+ messages in thread
From: Wang, Haiyue @ 2019-10-23 15:12 UTC (permalink / raw)
  To: Stephen Hemminger, Olivier Matz
  Cc: Ananyev, Konstantin, dev, Andrew Rybchenko, Richardson, Bruce,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Morten Brørup,
	Thomas Monjalon

> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Wednesday, October 23, 2019 23:00
> To: Olivier Matz <olivier.matz@6wind.com>
> Cc: Wang, Haiyue <haiyue.wang@intel.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> dev@dpdk.org; Andrew Rybchenko <arybchenko@solarflare.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> <keith.wiles@intel.com>; Morten Brørup <mb@smartsharesystems.com>; Thomas Monjalon
> <thomas@monjalon.net>
> Subject: Re: [PATCH v2] mbuf: support dynamic fields and flags
> 
> On Wed, 23 Oct 2019 12:21:43 +0200
> Olivier Matz <olivier.matz@6wind.com> wrote:
> 
> > On Wed, Oct 23, 2019 at 03:16:13AM +0000, Wang, Haiyue wrote:
> > > > -----Original Message-----
> > > > From: Ananyev, Konstantin
> > > > Sent: Wednesday, October 23, 2019 06:52
> > > > To: Olivier Matz <olivier.matz@6wind.com>; dev@dpdk.org
> > > > Cc: Andrew Rybchenko <arybchenko@solarflare.com>; Richardson, Bruce <bruce.richardson@intel.com>;
> Wang,
> > > > Haiyue <haiyue.wang@intel.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> > > > <keith.wiles@intel.com>; Morten Brørup <mb@smartsharesystems.com>; Stephen Hemminger
> > > > <stephen@networkplumber.org>; Thomas Monjalon <thomas@monjalon.net>
> > > > Subject: RE: [PATCH v2] mbuf: support dynamic fields and flags
> > > >
> > > >
> > > > > Many features require to store data inside the mbuf. As the room in mbuf
> > > > > structure is limited, it is not possible to have a field for each
> > > > > feature. Also, changing fields in the mbuf structure can break the API
> > > > > or ABI.
> > > > >
> > > > > This commit addresses these issues, by enabling the dynamic registration
> > > > > of fields or flags:
> > > > >
> > > > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > > > >   given size (>= 1 byte) and alignment constraint.
> > > > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > > > >
> > > > > The typical use case is a PMD that registers space for an offload
> > > > > feature, when the application requests to enable this feature.  As
> > > > > the space in mbuf is limited, the space should only be reserved if it
> > > > > is going to be used (i.e when the application explicitly asks for it).
> > > > >
> > > > > The registration can be done at any moment, but it is not possible
> > > > > to unregister fields or flags for now.
> > > > >
> > > > > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > > > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > > > > ---
> > > > >
> > > > > v2
> > > > >
> > > > > * Rebase on top of master: solve conflict with Stephen's patchset
> > > > >   (packet copy)
> > > > > * Add new apis to register a dynamic field/flag at a specific place
> > > > > * Add a dump function (sugg by David)
> > > > > * Enhance field registration function to select the best offset, keeping
> > > > >   large aligned zones as much as possible (sugg by Konstantin)
> > > > > * Use a size_t and unsigned int instead of int when relevant
> > > > >   (sugg by Konstantin)
> > > > > * Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
> > > > >   (sugg by Konstantin)
> > > > > * Remove unused argument in private function (sugg by Konstantin)
> > > > > * Fix and simplify locking (sugg by Konstantin)
> > > > > * Fix minor typo
> > > > >
> > > > > rfc -> v1
> > > > >
> > > > > * Rebase on top of master
> > > > > * Change registration API to use a structure instead of
> > > > >   variables, getting rid of #defines (Stephen's comment)
> > > > > * Update flag registration to use a similar API as fields.
> > > > > * Change max name length from 32 to 64 (sugg. by Thomas)
> > > > > * Enhance API documentation (Haiyue's and Andrew's comments)
> > > > > * Add a debug log at registration
> > > > > * Add some words in release note
> > > > > * Did some performance tests (sugg. by Andrew):
> > > > >   On my platform, reading a dynamic field takes ~3 cycles more
> > > > >   than a static field, and ~2 cycles more for writing.
> > > > >
> > > > >  app/test/test_mbuf.c                   | 145 ++++++-
> > > > >  doc/guides/rel_notes/release_19_11.rst |   7 +
> > > > >  lib/librte_mbuf/Makefile               |   2 +
> > > > >  lib/librte_mbuf/meson.build            |   6 +-
> > > > >  lib/librte_mbuf/rte_mbuf.h             |  23 +-
> > > > >  lib/librte_mbuf/rte_mbuf_dyn.c         | 548 +++++++++++++++++++++++++
> > > > >  lib/librte_mbuf/rte_mbuf_dyn.h         | 226 ++++++++++
> > > > >  lib/librte_mbuf/rte_mbuf_version.map   |   7 +
> > > > >  8 files changed, 959 insertions(+), 5 deletions(-)
> > > > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
> > > > >  create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h
> > > > >
> > > > > diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
> > > > > index b9c2b2500..01cafad59 100644
> > > > > --- a/app/test/test_mbuf.c
> > > > > +++ b/app/test/test_mbuf.c
> > > > > @@ -28,6 +28,7 @@
> > > > >  #include <rte_random.h>
> > > > >  #include <rte_cycles.h>
> > > > >  #include <rte_malloc.h>
> > > > > +#include <rte_mbuf_dyn.h>
> > > > >
> > >
> > > [snip]
> > > > > +int
> > > > > +rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
> > > > > +				unsigned int req)
> > > > > +{
> > > > > +	int ret;
> > > > > +
> > > > > +	if (req != UINT_MAX && req >= 64) {
> > > >
> > > > Might be better to replace 64 with something like sizeof(mbuf->ol_flags) * CHAR_BIT or so.
> > >
> > > Might introduce a new macro like kernel:
> > >
> > > /**
> > >  * FIELD_SIZEOF - get the size of a struct's field
> > >  * @t: the target struct
> > >  * @f: the target struct's field
> > >  * Return: the size of @f in the struct definition without having a
> > >  * declared instance of @t.
> > >  */
> > > #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
> > >
> > > Then: FIELD_SIZEOF(rte_mbuf, ol_flags) * CHAR_BIT
> >
> > Good idea, thanks
> >
> 
> Kernel is replacing FIELD_SIZEOF with sizeof_member

Yes, but looks like in 5.5 ? 5.4 hasn't merged. ;-)

https://www.phoronix.com/scan.php?page=news_item&px=Linux-5.4-Size-Of-Member

https://patchwork.kernel.org/patch/11184583/

+/**
+ * sizeof_member(TYPE, MEMBER) - get the size of a struct's member
+ *
+ * @TYPE: the target struct
+ * @MEMBER: the target struct's member
+ *
+ * Return: the size of @MEMBER in the struct definition without having a
+ * declared instance of @TYPE.
+ */
+#define sizeof_member(TYPE, MEMBER)	(sizeof(((TYPE *)0)->MEMBER))



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-23 13:33     ` Olivier Matz
@ 2019-10-24  4:54       ` Shahaf Shuler
  2019-10-24  7:07         ` Olivier Matz
  0 siblings, 1 reply; 64+ messages in thread
From: Shahaf Shuler @ 2019-10-24  4:54 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Andrew Rybchenko, Bruce Richardson, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Stephen Hemminger, Thomas Monjalon

Wednesday, October 23, 2019 4:34 PM, Olivier Matz:
> Subject: Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
> 
> Hi Shahaf,
> 
> On Wed, Oct 23, 2019 at 12:00:30PM +0000, Shahaf Shuler wrote:
> > Hi Olivier,
> >
> > Thursday, October 17, 2019 5:42 PM, Olivier Matz:
> > > Subject: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and
> > > flags
> > >
> > > Many features require to store data inside the mbuf. As the room in
> > > mbuf structure is limited, it is not possible to have a field for
> > > each feature. Also, changing fields in the mbuf structure can break the
> API or ABI.
> > >
> > > This commit addresses these issues, by enabling the dynamic
> > > registration of fields or flags:
> > >
> > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > >   given size (>= 1 byte) and alignment constraint.
> > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > >
> > > The typical use case is a PMD that registers space for an offload
> > > feature, when the application requests to enable this feature.  As
> > > the space in mbuf is limited, the space should only be reserved if
> > > it is going to be used (i.e when the application explicitly asks for it).
> >
> > According to description, the dynamic field enables custom application and
> supported PMDs to use the dynamic part of the mbuf for their specific
> needs.
> > However the mechanism to report and activate the field/flag registration
> comes from the general OFFLOAD flags.
> >
> > Maybe it will be better to an option to query and select dynamic fields for
> PMD outside of the standard ethdev offload flags?
> 
> It is not mandatory to use the ethdev layer to register a dynamic field or flag
> in the mbuf. It is just the typical use case.
> 
> It can also be enabled when using a library that have specific needs, for
> instance, you call rte_reorder_init(), and it will register the sequence number
> dynamic field.
> 
> An application that requires a specific mbuf field can also do the registration
> by itself.
> 
> In other words, when you initialize a subpart that needs a dynamic field or
> flag, you have to do the registration there.
> 

I guess my question mainly targets one of the use cases for dynamic mbuf fields which is vendor specific offloads.
On such case we would like to have dynamic fields/flags negotiated between the application and PMD. 

The question is whether we provide a unified way for application to query PMD specific dynamic fields or we let PMD vendor to implement this handshake as they wish (devargs, through PMD doc, etc..)



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-24  4:54       ` Shahaf Shuler
@ 2019-10-24  7:07         ` Olivier Matz
  0 siblings, 0 replies; 64+ messages in thread
From: Olivier Matz @ 2019-10-24  7:07 UTC (permalink / raw)
  To: Shahaf Shuler
  Cc: dev, Andrew Rybchenko, Bruce Richardson, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Stephen Hemminger, Thomas Monjalon

Hi,

On Thu, Oct 24, 2019 at 04:54:20AM +0000, Shahaf Shuler wrote:
> Wednesday, October 23, 2019 4:34 PM, Olivier Matz:
> > Subject: Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
> > 
> > Hi Shahaf,
> > 
> > On Wed, Oct 23, 2019 at 12:00:30PM +0000, Shahaf Shuler wrote:
> > > Hi Olivier,
> > >
> > > Thursday, October 17, 2019 5:42 PM, Olivier Matz:
> > > > Subject: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and
> > > > flags
> > > >
> > > > Many features require to store data inside the mbuf. As the room in
> > > > mbuf structure is limited, it is not possible to have a field for
> > > > each feature. Also, changing fields in the mbuf structure can break the
> > API or ABI.
> > > >
> > > > This commit addresses these issues, by enabling the dynamic
> > > > registration of fields or flags:
> > > >
> > > > - a dynamic field is a named area in the rte_mbuf structure, with a
> > > >   given size (>= 1 byte) and alignment constraint.
> > > > - a dynamic flag is a named bit in the rte_mbuf structure.
> > > >
> > > > The typical use case is a PMD that registers space for an offload
> > > > feature, when the application requests to enable this feature.  As
> > > > the space in mbuf is limited, the space should only be reserved if
> > > > it is going to be used (i.e when the application explicitly asks for it).
> > >
> > > According to description, the dynamic field enables custom application and
> > supported PMDs to use the dynamic part of the mbuf for their specific
> > needs.
> > > However the mechanism to report and activate the field/flag registration
> > comes from the general OFFLOAD flags.
> > >
> > > Maybe it will be better to an option to query and select dynamic fields for
> > PMD outside of the standard ethdev offload flags?
> > 
> > It is not mandatory to use the ethdev layer to register a dynamic field or flag
> > in the mbuf. It is just the typical use case.
> > 
> > It can also be enabled when using a library that have specific needs, for
> > instance, you call rte_reorder_init(), and it will register the sequence number
> > dynamic field.
> > 
> > An application that requires a specific mbuf field can also do the registration
> > by itself.
> > 
> > In other words, when you initialize a subpart that needs a dynamic field or
> > flag, you have to do the registration there.
> > 
> 
> I guess my question mainly targets one of the use cases for dynamic mbuf fields which is vendor specific offloads.
> On such case we would like to have dynamic fields/flags negotiated between the application and PMD. 
> 
> The question is whether we provide a unified way for application to query PMD specific dynamic fields or we let PMD vendor to implement this handshake as they wish (devargs, through PMD doc, etc..)

I have no strong opinion. It can be a PMD-specific API (function or
devargs) to enable the feature.

The only important thing is to not register the field if it won't be
used.


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-17 14:42 ` [dpdk-dev] [PATCH v2] " Olivier Matz
                     ` (2 preceding siblings ...)
  2019-10-23 12:00   ` Shahaf Shuler
@ 2019-10-24  7:38   ` Slava Ovsiienko
  2019-10-24  7:56     ` Olivier Matz
  3 siblings, 1 reply; 64+ messages in thread
From: Slava Ovsiienko @ 2019-10-24  7:38 UTC (permalink / raw)
  To: Olivier Matz, dev
  Cc: Andrew Rybchenko, Bruce Richardson, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Stephen Hemminger, Thomas Monjalon

Hi,

Doc building failed, it seems the rte_mbuf_dynfield_copy() description should be fixed:

./lib/librte_mbuf/rte_mbuf.h:1694: warning: argument 'm_dst' of command @param is not found in the argument list of rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
./lib/librte_mbuf/rte_mbuf.h:1694: warning: argument 'm_src' of command @param is not found in the argument list of rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
./lib/librte_mbuf/rte_mbuf.h:1694: warning: The following parameters of rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc) are not documented

With best regards,
Slava

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Olivier Matz
> Sent: Thursday, October 17, 2019 17:42
> To: dev@dpdk.org
> Cc: Andrew Rybchenko <arybchenko@solarflare.com>; Bruce Richardson
> <bruce.richardson@intel.com>; Wang, Haiyue <haiyue.wang@intel.com>;
> Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> <keith.wiles@intel.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; Morten Brørup
> <mb@smartsharesystems.com>; Stephen Hemminger
> <stephen@networkplumber.org>; Thomas Monjalon
> <thomas@monjalon.net>
> Subject: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
> 
> Many features require to store data inside the mbuf. As the room in mbuf
> structure is limited, it is not possible to have a field for each feature. Also,
> changing fields in the mbuf structure can break the API or ABI.
> 
> This commit addresses these issues, by enabling the dynamic registration of
> fields or flags:
> 
> - a dynamic field is a named area in the rte_mbuf structure, with a
>   given size (>= 1 byte) and alignment constraint.
> - a dynamic flag is a named bit in the rte_mbuf structure.
> 
> The typical use case is a PMD that registers space for an offload feature,
> when the application requests to enable this feature.  As the space in mbuf is
> limited, the space should only be reserved if it is going to be used (i.e when
> the application explicitly asks for it).
> 
> The registration can be done at any moment, but it is not possible to
> unregister fields or flags for now.
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> Acked-by: Thomas Monjalon <thomas@monjalon.net>
> ---
> 
> v2
> 
> * Rebase on top of master: solve conflict with Stephen's patchset
>   (packet copy)
> * Add new apis to register a dynamic field/flag at a specific place
> * Add a dump function (sugg by David)
> * Enhance field registration function to select the best offset, keeping
>   large aligned zones as much as possible (sugg by Konstantin)
> * Use a size_t and unsigned int instead of int when relevant
>   (sugg by Konstantin)
> * Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
>   (sugg by Konstantin)
> * Remove unused argument in private function (sugg by Konstantin)
> * Fix and simplify locking (sugg by Konstantin)
> * Fix minor typo
> 
> rfc -> v1
> 
> * Rebase on top of master
> * Change registration API to use a structure instead of
>   variables, getting rid of #defines (Stephen's comment)
> * Update flag registration to use a similar API as fields.
> * Change max name length from 32 to 64 (sugg. by Thomas)
> * Enhance API documentation (Haiyue's and Andrew's comments)
> * Add a debug log at registration
> * Add some words in release note
> * Did some performance tests (sugg. by Andrew):
>   On my platform, reading a dynamic field takes ~3 cycles more
>   than a static field, and ~2 cycles more for writing.
> 
>  app/test/test_mbuf.c                   | 145 ++++++-
>  doc/guides/rel_notes/release_19_11.rst |   7 +
>  lib/librte_mbuf/Makefile               |   2 +
>  lib/librte_mbuf/meson.build            |   6 +-
>  lib/librte_mbuf/rte_mbuf.h             |  23 +-
>  lib/librte_mbuf/rte_mbuf_dyn.c         | 548 +++++++++++++++++++++++++
>  lib/librte_mbuf/rte_mbuf_dyn.h         | 226 ++++++++++
>  lib/librte_mbuf/rte_mbuf_version.map   |   7 +
>  8 files changed, 959 insertions(+), 5 deletions(-)  create mode 100644
> lib/librte_mbuf/rte_mbuf_dyn.c  create mode 100644
> lib/librte_mbuf/rte_mbuf_dyn.h
> 
> diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c index
> b9c2b2500..01cafad59 100644
> --- a/app/test/test_mbuf.c
> +++ b/app/test/test_mbuf.c
> @@ -28,6 +28,7 @@
>  #include <rte_random.h>
>  #include <rte_cycles.h>
>  #include <rte_malloc.h>
> +#include <rte_mbuf_dyn.h>
> 
>  #include "test.h"
> 
> @@ -657,7 +658,6 @@ test_attach_from_different_pool(struct
> rte_mempool *pktmbuf_pool,
>  		rte_pktmbuf_free(clone2);
>  	return -1;
>  }
> -#undef GOTO_FAIL
> 
>  /*
>   * test allocation and free of mbufs
> @@ -1276,6 +1276,143 @@ test_tx_offload(void)
>  	return (v1 == v2) ? 0 : -EINVAL;
>  }
> 
> +static int
> +test_mbuf_dyn(struct rte_mempool *pktmbuf_pool) {
> +	const struct rte_mbuf_dynfield dynfield = {
> +		.name = "test-dynfield",
> +		.size = sizeof(uint8_t),
> +		.align = __alignof__(uint8_t),
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynfield dynfield2 = {
> +		.name = "test-dynfield2",
> +		.size = sizeof(uint16_t),
> +		.align = __alignof__(uint16_t),
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynfield dynfield3 = {
> +		.name = "test-dynfield3",
> +		.size = sizeof(uint8_t),
> +		.align = __alignof__(uint8_t),
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynfield dynfield_fail_big = {
> +		.name = "test-dynfield-fail-big",
> +		.size = 256,
> +		.align = 1,
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynfield dynfield_fail_align = {
> +		.name = "test-dynfield-fail-align",
> +		.size = 1,
> +		.align = 3,
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynflag dynflag = {
> +		.name = "test-dynflag",
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynflag dynflag2 = {
> +		.name = "test-dynflag2",
> +		.flags = 0,
> +	};
> +	const struct rte_mbuf_dynflag dynflag3 = {
> +		.name = "test-dynflag3",
> +		.flags = 0,
> +	};
> +	struct rte_mbuf *m = NULL;
> +	int offset, offset2, offset3;
> +	int flag, flag2, flag3;
> +	int ret;
> +
> +	printf("Test mbuf dynamic fields and flags\n");
> +	rte_mbuf_dyn_dump(stdout);
> +
> +	offset = rte_mbuf_dynfield_register(&dynfield);
> +	if (offset == -1)
> +		GOTO_FAIL("failed to register dynamic field, offset=%d: %s",
> +			offset, strerror(errno));
> +
> +	ret = rte_mbuf_dynfield_register(&dynfield);
> +	if (ret != offset)
> +		GOTO_FAIL("failed to lookup dynamic field, ret=%d: %s",
> +			ret, strerror(errno));
> +
> +	offset2 = rte_mbuf_dynfield_register(&dynfield2);
> +	if (offset2 == -1 || offset2 == offset || (offset2 & 1))
> +		GOTO_FAIL("failed to register dynamic field 2, offset2=%d:
> %s",
> +			offset2, strerror(errno));
> +
> +	offset3 = rte_mbuf_dynfield_register_offset(&dynfield3,
> +				offsetof(struct rte_mbuf, dynfield1[1]));
> +	if (offset3 != offsetof(struct rte_mbuf, dynfield1[1]))
> +		GOTO_FAIL("failed to register dynamic field 3, offset=%d:
> %s",
> +			offset3, strerror(errno));
> +
> +	printf("dynfield: offset=%d, offset2=%d, offset3=%d\n",
> +		offset, offset2, offset3);
> +
> +	ret = rte_mbuf_dynfield_register(&dynfield_fail_big);
> +	if (ret != -1)
> +		GOTO_FAIL("dynamic field creation should fail (too big)");
> +
> +	ret = rte_mbuf_dynfield_register(&dynfield_fail_align);
> +	if (ret != -1)
> +		GOTO_FAIL("dynamic field creation should fail (bad
> alignment)");
> +
> +	ret = rte_mbuf_dynfield_register_offset(&dynfield_fail_align,
> +				offsetof(struct rte_mbuf, ol_flags));
> +	if (ret != -1)
> +		GOTO_FAIL("dynamic field creation should fail (not avail)");
> +
> +	flag = rte_mbuf_dynflag_register(&dynflag);
> +	if (flag == -1)
> +		GOTO_FAIL("failed to register dynamic flag, flag=%d: %s",
> +			flag, strerror(errno));
> +
> +	ret = rte_mbuf_dynflag_register(&dynflag);
> +	if (ret != flag)
> +		GOTO_FAIL("failed to lookup dynamic flag, ret=%d: %s",
> +			ret, strerror(errno));
> +
> +	flag2 = rte_mbuf_dynflag_register(&dynflag2);
> +	if (flag2 == -1 || flag2 == flag)
> +		GOTO_FAIL("failed to register dynamic flag 2, flag2=%d: %s",
> +			flag2, strerror(errno));
> +
> +	flag3 = rte_mbuf_dynflag_register_bitnum(&dynflag3,
> +						rte_bsf64(PKT_LAST_FREE));
> +	if (flag3 != rte_bsf64(PKT_LAST_FREE))
> +		GOTO_FAIL("failed to register dynamic flag 3, flag2=%d: %s",
> +			flag3, strerror(errno));
> +
> +	printf("dynflag: flag=%d, flag2=%d, flag3=%d\n", flag, flag2, flag3);
> +
> +	/* set, get dynamic field */
> +	m = rte_pktmbuf_alloc(pktmbuf_pool);
> +	if (m == NULL)
> +		GOTO_FAIL("Cannot allocate mbuf");
> +
> +	*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) = 1;
> +	if (*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) != 1)
> +		GOTO_FAIL("failed to read dynamic field");
> +	*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) = 1000;
> +	if (*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) != 1000)
> +		GOTO_FAIL("failed to read dynamic field");
> +
> +	/* set a dynamic flag */
> +	m->ol_flags |= (1ULL << flag);
> +
> +	rte_mbuf_dyn_dump(stdout);
> +	rte_pktmbuf_free(m);
> +	return 0;
> +fail:
> +	rte_pktmbuf_free(m);
> +	return -1;
> +}
> +#undef GOTO_FAIL
> +
>  static int
>  test_mbuf(void)
>  {
> @@ -1295,6 +1432,12 @@ test_mbuf(void)
>  		goto err;
>  	}
> 
> +	/* test registration of dynamic fields and flags */
> +	if (test_mbuf_dyn(pktmbuf_pool) < 0) {
> +		printf("mbuf dynflag test failed\n");
> +		goto err;
> +	}
> +
>  	/* create a specific pktmbuf pool with a priv_size != 0 and no data
>  	 * room size */
>  	pktmbuf_pool2 = rte_pktmbuf_pool_create("test_pktmbuf_pool2",
> diff --git a/doc/guides/rel_notes/release_19_11.rst
> b/doc/guides/rel_notes/release_19_11.rst
> index 85953b962..9e9c94554 100644
> --- a/doc/guides/rel_notes/release_19_11.rst
> +++ b/doc/guides/rel_notes/release_19_11.rst
> @@ -21,6 +21,13 @@ DPDK Release 19.11
> 
>        xdg-open build/doc/html/guides/rel_notes/release_19_11.html
> 
> +* **Add support of support dynamic fields and flags in mbuf.**
> +
> +  This new feature adds the ability to dynamically register some room
> + for a field or a flag in the mbuf structure. This is typically used
> + for specific offload features, where adding a static field or flag  in
> + the mbuf is not justified.
> +
> 
>  New Features
>  ------------
> diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile index
> c8f6d2689..5a9bcee73 100644
> --- a/lib/librte_mbuf/Makefile
> +++ b/lib/librte_mbuf/Makefile
> @@ -17,8 +17,10 @@ LIBABIVER := 5
> 
>  # all source are stored in SRCS-y
>  SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c
> rte_mbuf_pool_ops.c
> +SRCS-$(CONFIG_RTE_LIBRTE_MBUF) += rte_mbuf_dyn.c
> 
>  # install includes
>  SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h
> rte_mbuf_ptype.h rte_mbuf_pool_ops.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include += rte_mbuf_dyn.h
> 
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build index
> 6cc11ebb4..9137e8f26 100644
> --- a/lib/librte_mbuf/meson.build
> +++ b/lib/librte_mbuf/meson.build
> @@ -2,8 +2,10 @@
>  # Copyright(c) 2017 Intel Corporation
> 
>  version = 5
> -sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c') -
> headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
> +sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c',
> +	'rte_mbuf_dyn.c')
> +headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h',
> +	'rte_mbuf_dyn.h')
>  deps += ['mempool']
> 
>  allow_experimental_apis = true
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index
> fb0849ac1..5740b1e93 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -198,9 +198,12 @@ extern "C" {
>  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
>  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
> 
> -/* add new RX flags here */
> +/* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> 
> -/* add new TX flags here */
> +#define PKT_FIRST_FREE (1ULL << 23)
> +#define PKT_LAST_FREE (1ULL << 39)
> +
> +/* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> 
>  /**
>   * Indicate that the metadata field in the mbuf is in use.
> @@ -738,6 +741,7 @@ struct rte_mbuf {
>  	 */
>  	struct rte_mbuf_ext_shared_info *shinfo;
> 
> +	uint64_t dynfield1[2]; /**< Reserved for dynamic fields. */
>  } __rte_cache_aligned;
> 
>  /**
> @@ -1684,6 +1688,20 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m,
> void *buf_addr,
>   */
>  #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
> 
> +/**
> + * Copy dynamic fields from m_src to m_dst.
> + *
> + * @param m_dst
> + *   The destination mbuf.
> + * @param m_src
> + *   The source mbuf.
> + */
> +static inline void
> +rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf
> +*msrc) {
> +	memcpy(&mdst->dynfield1, msrc->dynfield1, sizeof(mdst-
> >dynfield1)); }
> +
>  /* internal */
>  static inline void
>  __rte_pktmbuf_copy_hdr(struct rte_mbuf *mdst, const struct rte_mbuf
> *msrc) @@ -1695,6 +1713,7 @@ __rte_pktmbuf_copy_hdr(struct rte_mbuf
> *mdst, const struct rte_mbuf *msrc)
>  	mdst->hash = msrc->hash;
>  	mdst->packet_type = msrc->packet_type;
>  	mdst->timestamp = msrc->timestamp;
> +	rte_mbuf_dynfield_copy(mdst, msrc);
>  }
> 
>  /**
> diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c
> b/lib/librte_mbuf/rte_mbuf_dyn.c new file mode 100644 index
> 000000000..9ef235483
> --- /dev/null
> +++ b/lib/librte_mbuf/rte_mbuf_dyn.c
> @@ -0,0 +1,548 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2019 6WIND S.A.
> + */
> +
> +#include <sys/queue.h>
> +#include <stdint.h>
> +#include <limits.h>
> +
> +#include <rte_common.h>
> +#include <rte_eal.h>
> +#include <rte_eal_memconfig.h>
> +#include <rte_tailq.h>
> +#include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_string_fns.h>
> +#include <rte_mbuf.h>
> +#include <rte_mbuf_dyn.h>
> +
> +#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
> +
> +struct mbuf_dynfield_elt {
> +	TAILQ_ENTRY(mbuf_dynfield_elt) next;
> +	struct rte_mbuf_dynfield params;
> +	size_t offset;
> +};
> +TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem mbuf_dynfield_tailq = {
> +	.name = "RTE_MBUF_DYNFIELD",
> +};
> +EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
> +
> +struct mbuf_dynflag_elt {
> +	TAILQ_ENTRY(mbuf_dynflag_elt) next;
> +	struct rte_mbuf_dynflag params;
> +	unsigned int bitnum;
> +};
> +TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem mbuf_dynflag_tailq = {
> +	.name = "RTE_MBUF_DYNFLAG",
> +};
> +EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
> +
> +struct mbuf_dyn_shm {
> +	/**
> +	 * For each mbuf byte, free_space[i] != 0 if space is free.
> +	 * The value is the size of the biggest aligned element that
> +	 * can fit in the zone.
> +	 */
> +	uint8_t free_space[sizeof(struct rte_mbuf)];
> +	/** Bitfield of available flags. */
> +	uint64_t free_flags;
> +};
> +static struct mbuf_dyn_shm *shm;
> +
> +/* Set the value of free_space[] according to the size and alignment of
> + * the free areas. This helps to select the best place when reserving a
> + * dynamic field. Assume tailq is locked.
> + */
> +static void
> +process_score(void)
> +{
> +	size_t off, align, size, i;
> +
> +	/* first, erase previous info */
> +	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
> +		if (shm->free_space[i])
> +			shm->free_space[i] = 1;
> +	}
> +
> +	for (off = 0; off < sizeof(struct rte_mbuf); off++) {
> +		/* get the size of the free zone */
> +		for (size = 0; shm->free_space[off + size]; size++)
> +			;
> +		if (size == 0)
> +			continue;
> +
> +		/* get the alignment of biggest object that can fit in
> +		 * the zone at this offset.
> +		 */
> +		for (align = 1;
> +		     (off % (align << 1)) == 0 && (align << 1) <= size;
> +		     align <<= 1)
> +			;
> +
> +		/* save it in free_space[] */
> +		for (i = off; i < off + size; i++)
> +			shm->free_space[i] = RTE_MAX(align, shm-
> >free_space[i]);
> +	}
> +}
> +
> +/* Allocate and initialize the shared memory. Assume tailq is locked */
> +static int
> +init_shared_mem(void)
> +{
> +	const struct rte_memzone *mz;
> +	uint64_t mask;
> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		mz =
> rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
> +						sizeof(struct
> mbuf_dyn_shm),
> +						SOCKET_ID_ANY, 0,
> +						RTE_CACHE_LINE_SIZE);
> +	} else {
> +		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
> +	}
> +	if (mz == NULL)
> +		return -1;
> +
> +	shm = mz->addr;
> +
> +#define mark_free(field)						\
> +	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
> +		1, sizeof(((struct rte_mbuf *)0)->field))
> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		/* init free_space, keep it sync'd with
> +		 * rte_mbuf_dynfield_copy().
> +		 */
> +		memset(shm, 0, sizeof(*shm));
> +		mark_free(dynfield1);
> +
> +		/* init free_flags */
> +		for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask
> <<= 1)
> +			shm->free_flags |= mask;
> +
> +		process_score();
> +	}
> +#undef mark_free
> +
> +	return 0;
> +}
> +
> +/* check if this offset can be used */
> +static int
> +check_offset(size_t offset, size_t size, size_t align) {
> +	size_t i;
> +
> +	if ((offset & (align - 1)) != 0)
> +		return -1;
> +	if (offset + size > sizeof(struct rte_mbuf))
> +		return -1;
> +
> +	for (i = 0; i < size; i++) {
> +		if (!shm->free_space[i + offset])
> +			return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +/* assume tailq is locked */
> +static struct mbuf_dynfield_elt *
> +__mbuf_dynfield_lookup(const char *name) {
> +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> +	struct mbuf_dynfield_elt *mbuf_dynfield;
> +	struct rte_tailq_entry *te;
> +
> +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> +
> +	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
> +		mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
> +		if (strcmp(name, mbuf_dynfield->params.name) == 0)
> +			break;
> +	}
> +
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +
> +	return mbuf_dynfield;
> +}
> +
> +int
> +rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield
> +*params) {
> +	struct mbuf_dynfield_elt *mbuf_dynfield;
> +
> +	if (shm == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_read_lock();
> +	mbuf_dynfield = __mbuf_dynfield_lookup(name);
> +	rte_mcfg_tailq_read_unlock();
> +
> +	if (mbuf_dynfield == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	if (params != NULL)
> +		memcpy(params, &mbuf_dynfield->params,
> sizeof(*params));
> +
> +	return mbuf_dynfield->offset;
> +}
> +
> +static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
> +		const struct rte_mbuf_dynfield *params2) {
> +	if (strcmp(params1->name, params2->name))
> +		return -1;
> +	if (params1->size != params2->size)
> +		return -1;
> +	if (params1->align != params2->align)
> +		return -1;
> +	if (params1->flags != params2->flags)
> +		return -1;
> +	return 0;
> +}
> +
> +/* assume tailq is locked */
> +static int
> +__rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield
> *params,
> +				size_t req)
> +{
> +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> +	struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
> +	struct rte_tailq_entry *te = NULL;
> +	unsigned int best_zone = UINT_MAX;
> +	size_t i, offset;
> +	int ret;
> +
> +	if (shm == NULL && init_shared_mem() < 0)
> +		return -1;
> +
> +	mbuf_dynfield = __mbuf_dynfield_lookup(params->name);
> +	if (mbuf_dynfield != NULL) {
> +		if (req != SIZE_MAX && req != mbuf_dynfield->offset) {
> +			rte_errno = EEXIST;
> +			return -1;
> +		}
> +		if (mbuf_dynfield_cmp(params, &mbuf_dynfield->params) <
> 0) {
> +			rte_errno = EEXIST;
> +			return -1;
> +		}
> +		return mbuf_dynfield->offset;
> +	}
> +
> +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> +		rte_errno = EPERM;
> +		return -1;
> +	}
> +
> +	if (req == SIZE_MAX) {
> +		for (offset = 0;
> +		     offset < sizeof(struct rte_mbuf);
> +		     offset++) {
> +			if (check_offset(offset, params->size,
> +						params->align) == 0 &&
> +					shm->free_space[offset] <
> best_zone) {
> +				best_zone = shm->free_space[offset];
> +				req = offset;
> +			}
> +		}
> +		if (req == SIZE_MAX) {
> +			rte_errno = ENOENT;
> +			return -1;
> +		}
> +	} else {
> +		if (check_offset(req, params->size, params->align) < 0) {
> +			rte_errno = EBUSY;
> +			return -1;
> +		}
> +	}
> +
> +	offset = req;
> +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> +
> +	te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL)
> +		return -1;
> +
> +	mbuf_dynfield = rte_zmalloc("mbuf_dynfield",
> sizeof(*mbuf_dynfield), 0);
> +	if (mbuf_dynfield == NULL) {
> +		rte_free(te);
> +		return -1;
> +	}
> +
> +	ret = strlcpy(mbuf_dynfield->params.name, params->name,
> +		sizeof(mbuf_dynfield->params.name));
> +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->params.name)) {
> +		rte_errno = ENAMETOOLONG;
> +		rte_free(mbuf_dynfield);
> +		rte_free(te);
> +		return -1;
> +	}
> +	memcpy(&mbuf_dynfield->params, params, sizeof(mbuf_dynfield-
> >params));
> +	mbuf_dynfield->offset = offset;
> +	te->data = mbuf_dynfield;
> +
> +	TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
> +
> +	for (i = offset; i < offset + params->size; i++)
> +		shm->free_space[i] = 0;
> +	process_score();
> +
> +	RTE_LOG(DEBUG, MBUF, "Registered dynamic field %s (sz=%zu,
> al=%zu, fl=0x%x) -> %zd\n",
> +		params->name, params->size, params->align, params->flags,
> +		offset);
> +
> +	return offset;
> +}
> +
> +int
> +rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield
> *params,
> +				size_t req)
> +{
> +	int ret;
> +
> +	if (params->size >= sizeof(struct rte_mbuf)) {
> +		rte_errno = EINVAL;
> +		return -1;
> +	}
> +	if (!rte_is_power_of_2(params->align)) {
> +		rte_errno = EINVAL;
> +		return -1;
> +	}
> +	if (params->flags != 0) {
> +		rte_errno = EINVAL;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_write_lock();
> +	ret = __rte_mbuf_dynfield_register_offset(params, req);
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ret;
> +}
> +
> +int
> +rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params) {
> +	return rte_mbuf_dynfield_register_offset(params, SIZE_MAX); }
> +
> +/* assume tailq is locked */
> +static struct mbuf_dynflag_elt *
> +__mbuf_dynflag_lookup(const char *name) {
> +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> +	struct mbuf_dynflag_elt *mbuf_dynflag;
> +	struct rte_tailq_entry *te;
> +
> +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> +
> +	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
> +		mbuf_dynflag = (struct mbuf_dynflag_elt *)te->data;
> +		if (strncmp(name, mbuf_dynflag->params.name,
> +				RTE_MBUF_DYN_NAMESIZE) == 0)
> +			break;
> +	}
> +
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +
> +	return mbuf_dynflag;
> +}
> +
> +int
> +rte_mbuf_dynflag_lookup(const char *name,
> +			struct rte_mbuf_dynflag *params)
> +{
> +	struct mbuf_dynflag_elt *mbuf_dynflag;
> +
> +	if (shm == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_read_lock();
> +	mbuf_dynflag = __mbuf_dynflag_lookup(name);
> +	rte_mcfg_tailq_read_unlock();
> +
> +	if (mbuf_dynflag == NULL) {
> +		rte_errno = ENOENT;
> +		return -1;
> +	}
> +
> +	if (params != NULL)
> +		memcpy(params, &mbuf_dynflag->params, sizeof(*params));
> +
> +	return mbuf_dynflag->bitnum;
> +}
> +
> +static int mbuf_dynflag_cmp(const struct rte_mbuf_dynflag *params1,
> +		const struct rte_mbuf_dynflag *params2) {
> +	if (strcmp(params1->name, params2->name))
> +		return -1;
> +	if (params1->flags != params2->flags)
> +		return -1;
> +	return 0;
> +}
> +
> +/* assume tailq is locked */
> +static int
> +__rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag
> *params,
> +				unsigned int req)
> +{
> +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> +	struct mbuf_dynflag_elt *mbuf_dynflag = NULL;
> +	struct rte_tailq_entry *te = NULL;
> +	unsigned int bitnum;
> +	int ret;
> +
> +	if (shm == NULL && init_shared_mem() < 0)
> +		return -1;
> +
> +	mbuf_dynflag = __mbuf_dynflag_lookup(params->name);
> +	if (mbuf_dynflag != NULL) {
> +		if (req != UINT_MAX && req != mbuf_dynflag->bitnum) {
> +			rte_errno = EEXIST;
> +			return -1;
> +		}
> +		if (mbuf_dynflag_cmp(params, &mbuf_dynflag->params) < 0)
> {
> +			rte_errno = EEXIST;
> +			return -1;
> +		}
> +		return mbuf_dynflag->bitnum;
> +	}
> +
> +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> +		rte_errno = EPERM;
> +		return -1;
> +	}
> +
> +	if (req == UINT_MAX) {
> +		if (shm->free_flags == 0) {
> +			rte_errno = ENOENT;
> +			return -1;
> +		}
> +		bitnum = rte_bsf64(shm->free_flags);
> +	} else {
> +		if ((shm->free_flags & (1ULL << req)) == 0) {
> +			rte_errno = EBUSY;
> +			return -1;
> +		}
> +		bitnum = req;
> +	}
> +
> +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> +
> +	te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL)
> +		return -1;
> +
> +	mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag),
> 0);
> +	if (mbuf_dynflag == NULL) {
> +		rte_free(te);
> +		return -1;
> +	}
> +
> +	ret = strlcpy(mbuf_dynflag->params.name, params->name,
> +		sizeof(mbuf_dynflag->params.name));
> +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->params.name)) {
> +		rte_free(mbuf_dynflag);
> +		rte_free(te);
> +		rte_errno = ENAMETOOLONG;
> +		return -1;
> +	}
> +	mbuf_dynflag->bitnum = bitnum;
> +	te->data = mbuf_dynflag;
> +
> +	TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
> +
> +	shm->free_flags &= ~(1ULL << bitnum);
> +
> +	RTE_LOG(DEBUG, MBUF, "Registered dynamic flag %s (fl=0x%x) ->
> %u\n",
> +		params->name, params->flags, bitnum);
> +
> +	return bitnum;
> +}
> +
> +int
> +rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
> +				unsigned int req)
> +{
> +	int ret;
> +
> +	if (req != UINT_MAX && req >= 64) {
> +		rte_errno = EINVAL;
> +		return -1;
> +	}
> +
> +	rte_mcfg_tailq_write_lock();
> +	ret = __rte_mbuf_dynflag_register_bitnum(params, req);
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ret;
> +}
> +
> +int
> +rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params) {
> +	return rte_mbuf_dynflag_register_bitnum(params, UINT_MAX); }
> +
> +void rte_mbuf_dyn_dump(FILE *out)
> +{
> +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> +	struct mbuf_dynfield_elt *dynfield;
> +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> +	struct mbuf_dynflag_elt *dynflag;
> +	struct rte_tailq_entry *te;
> +	size_t i;
> +
> +	rte_mcfg_tailq_write_lock();
> +	init_shared_mem();
> +	fprintf(out, "Reserved fields:\n");
> +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> +	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
> +		dynfield = (struct mbuf_dynfield_elt *)te->data;
> +		fprintf(out, "  name=%s offset=%zd size=%zd align=%zd
> flags=%x\n",
> +			dynfield->params.name, dynfield->offset,
> +			dynfield->params.size, dynfield->params.align,
> +			dynfield->params.flags);
> +	}
> +	fprintf(out, "Reserved flags:\n");
> +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> +	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
> +		dynflag = (struct mbuf_dynflag_elt *)te->data;
> +		fprintf(out, "  name=%s bitnum=%u flags=%x\n",
> +			dynflag->params.name, dynflag->bitnum,
> +			dynflag->params.flags);
> +	}
> +	fprintf(out, "Free space in mbuf (0 = free, value = zone
> alignment):\n");
> +	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
> +		if ((i % 8) == 0)
> +			fprintf(out, "  %4.4zx: ", i);
> +		fprintf(out, "%2.2x%s", shm->free_space[i],
> +			(i % 8 != 7) ? " " : "\n");
> +	}
> +	rte_mcfg_tailq_write_unlock();
> +}
> diff --git a/lib/librte_mbuf/rte_mbuf_dyn.h
> b/lib/librte_mbuf/rte_mbuf_dyn.h new file mode 100644 index
> 000000000..307613c96
> --- /dev/null
> +++ b/lib/librte_mbuf/rte_mbuf_dyn.h
> @@ -0,0 +1,226 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2019 6WIND S.A.
> + */
> +
> +#ifndef _RTE_MBUF_DYN_H_
> +#define _RTE_MBUF_DYN_H_
> +
> +/**
> + * @file
> + * RTE Mbuf dynamic fields and flags
> + *
> + * Many features require to store data inside the mbuf. As the room in
> + * mbuf structure is limited, it is not possible to have a field for
> + * each feature. Also, changing fields in the mbuf structure can break
> + * the API or ABI.
> + *
> + * This module addresses this issue, by enabling the dynamic
> + * registration of fields or flags:
> + *
> + * - a dynamic field is a named area in the rte_mbuf structure, with a
> + *   given size (>= 1 byte) and alignment constraint.
> + * - a dynamic flag is a named bit in the rte_mbuf structure, stored
> + *   in mbuf->ol_flags.
> + *
> + * The typical use case is when a specific offload feature requires to
> + * register a dedicated offload field in the mbuf structure, and adding
> + * a static field or flag is not justified.
> + *
> + * Example of use:
> + *
> + * - A rte_mbuf_dynfield structure is defined, containing the parameters
> + *   of the dynamic field to be registered:
> + *   const struct rte_mbuf_dynfield rte_dynfield_my_feature = { ... };
> + * - The application initializes the PMD, and asks for this feature
> + *   at port initialization by passing DEV_RX_OFFLOAD_MY_FEATURE in
> + *   rxconf. This will make the PMD to register the field by calling
> + *   rte_mbuf_dynfield_register(&rte_dynfield_my_feature). The PMD
> + *   stores the returned offset.
> + * - The application that uses the offload feature also registers
> + *   the field to retrieve the same offset.
> + * - When the PMD receives a packet, it can set the field:
> + *   *RTE_MBUF_DYNFIELD(m, offset, <type *>) = value;
> + * - In the main loop, the application can retrieve the value with
> + *   the same macro.
> + *
> + * To avoid wasting space, the dynamic fields or flags must only be
> + * reserved on demand, when an application asks for the related feature.
> + *
> + * The registration can be done at any moment, but it is not possible
> + * to unregister fields or flags for now.
> + *
> + * A dynamic field can be reserved and used by an application only.
> + * It can for instance be a packet mark.
> + */
> +
> +#include <sys/types.h>
> +/**
> + * Maximum length of the dynamic field or flag string.
> + */
> +#define RTE_MBUF_DYN_NAMESIZE 64
> +
> +/**
> + * Structure describing the parameters of a mbuf dynamic field.
> + */
> +struct rte_mbuf_dynfield {
> +	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the field. */
> +	size_t size;        /**< The number of bytes to reserve. */
> +	size_t align;       /**< The alignment constraint (power of 2). */
> +	unsigned int flags; /**< Reserved for future use, must be 0. */ };
> +
> +/**
> + * Structure describing the parameters of a mbuf dynamic flag.
> + */
> +struct rte_mbuf_dynflag {
> +	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the dynamic
> flag. */
> +	unsigned int flags; /**< Reserved for future use, must be 0. */ };
> +
> +/**
> + * Register space for a dynamic field in the mbuf structure.
> + *
> + * If the field is already registered (same name and parameters), its
> + * offset is returned.
> + *
> + * @param params
> + *   A structure containing the requested parameters (name, size,
> + *   alignment constraint and flags).
> + * @return
> + *   The offset in the mbuf structure, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - EINVAL: invalid parameters (size, align, or flags).
> + *   - EEXIST: this name is already register with different parameters.
> + *   - EPERM: called from a secondary process.
> + *   - ENOENT: not enough room in mbuf.
> + *   - ENOMEM: allocation failure.
> + *   - ENAMETOOLONG: name does not ends with \0.
> + */
> +__rte_experimental
> +int rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params);
> +
> +/**
> + * Register space for a dynamic field in the mbuf structure at offset.
> + *
> + * If the field is already registered (same name, parameters and
> +offset),
> + * the offset is returned.
> + *
> + * @param params
> + *   A structure containing the requested parameters (name, size,
> + *   alignment constraint and flags).
> + * @param offset
> + *   The requested offset. Ignored if SIZE_MAX is passed.
> + * @return
> + *   The offset in the mbuf structure, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - EINVAL: invalid parameters (size, align, flags, or offset).
> + *   - EEXIST: this name is already register with different parameters.
> + *   - EBUSY: the requested offset cannot be used.
> + *   - EPERM: called from a secondary process.
> + *   - ENOENT: not enough room in mbuf.
> + *   - ENOMEM: allocation failure.
> + *   - ENAMETOOLONG: name does not ends with \0.
> + */
> +__rte_experimental
> +int rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield
> *params,
> +				size_t offset);
> +
> +/**
> + * Lookup for a registered dynamic mbuf field.
> + *
> + * @param name
> + *   A string identifying the dynamic field.
> + * @param params
> + *   If not NULL, and if the lookup is successful, the structure is
> + *   filled with the parameters of the dynamic field.
> + * @return
> + *   The offset of this field in the mbuf structure, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - ENOENT: no dynamic field matches this name.
> + */
> +__rte_experimental
> +int rte_mbuf_dynfield_lookup(const char *name,
> +			struct rte_mbuf_dynfield *params);
> +
> +/**
> + * Register a dynamic flag in the mbuf structure.
> + *
> + * If the flag is already registered (same name and parameters), its
> + * bitnum is returned.
> + *
> + * @param params
> + *   A structure containing the requested parameters of the dynamic
> + *   flag (name and options).
> + * @return
> + *   The number of the reserved bit, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - EINVAL: invalid parameters (size, align, or flags).
> + *   - EEXIST: this name is already register with different parameters.
> + *   - EPERM: called from a secondary process.
> + *   - ENOENT: no more flag available.
> + *   - ENOMEM: allocation failure.
> + *   - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE -
> 1.
> + */
> +__rte_experimental
> +int rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params);
> +
> +/**
> + * Register a dynamic flag in the mbuf structure specifying bitnum.
> + *
> + * If the flag is already registered (same name, parameters and
> +bitnum),
> + * the bitnum is returned.
> + *
> + * @param params
> + *   A structure containing the requested parameters of the dynamic
> + *   flag (name and options).
> + * @param bitnum
> + *   The requested bitnum. Ignored if UINT_MAX is passed.
> + * @return
> + *   The number of the reserved bit, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - EINVAL: invalid parameters (size, align, or flags).
> + *   - EEXIST: this name is already register with different parameters.
> + *   - EBUSY: the requested bitnum cannot be used.
> + *   - EPERM: called from a secondary process.
> + *   - ENOENT: no more flag available.
> + *   - ENOMEM: allocation failure.
> + *   - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE -
> 1.
> + */
> +__rte_experimental
> +int rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag
> *params,
> +				unsigned int bitnum);
> +
> +/**
> + * Lookup for a registered dynamic mbuf flag.
> + *
> + * @param name
> + *   A string identifying the dynamic flag.
> + * @param params
> + *   If not NULL, and if the lookup is successful, the structure is
> + *   filled with the parameters of the dynamic flag.
> + * @return
> + *   The offset of this flag in the mbuf structure, or -1 on error.
> + *   Possible values for rte_errno:
> + *   - ENOENT: no dynamic flag matches this name.
> + */
> +__rte_experimental
> +int rte_mbuf_dynflag_lookup(const char *name,
> +			struct rte_mbuf_dynflag *params);
> +
> +/**
> + * Helper macro to access to a dynamic field.
> + */
> +#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) +
> +(offset)))
> +
> +/**
> + * Dump the status of dynamic fields and flags.
> + *
> + * @param out
> + *   The stream where the status is displayed.
> + */
> +__rte_experimental
> +void rte_mbuf_dyn_dump(FILE *out);
> +
> +/* Placeholder for dynamic fields and flags declarations. */
> +
> +#endif
> diff --git a/lib/librte_mbuf/rte_mbuf_version.map
> b/lib/librte_mbuf/rte_mbuf_version.map
> index 519fead35..9bf5ca37a 100644
> --- a/lib/librte_mbuf/rte_mbuf_version.map
> +++ b/lib/librte_mbuf/rte_mbuf_version.map
> @@ -58,6 +58,13 @@ EXPERIMENTAL {
>  	global:
> 
>  	rte_mbuf_check;
> +	rte_mbuf_dynfield_lookup;
> +	rte_mbuf_dynfield_register;
> +	rte_mbuf_dynfield_register_offset;
> +	rte_mbuf_dynflag_lookup;
> +	rte_mbuf_dynflag_register;
> +	rte_mbuf_dynflag_register_bitnum;
> +	rte_mbuf_dyn_dump;
>  	rte_pktmbuf_copy;
> 
>  } DPDK_18.08;
> --
> 2.20.1


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
  2019-10-24  7:38   ` Slava Ovsiienko
@ 2019-10-24  7:56     ` Olivier Matz
  0 siblings, 0 replies; 64+ messages in thread
From: Olivier Matz @ 2019-10-24  7:56 UTC (permalink / raw)
  To: Slava Ovsiienko
  Cc: dev, Andrew Rybchenko, Bruce Richardson, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Stephen Hemminger, Thomas Monjalon

On Thu, Oct 24, 2019 at 07:38:15AM +0000, Slava Ovsiienko wrote:
> Hi,
> 
> Doc building failed, it seems the rte_mbuf_dynfield_copy() description should be fixed:
> 
> ./lib/librte_mbuf/rte_mbuf.h:1694: warning: argument 'm_dst' of command @param is not found in the argument list of rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
> ./lib/librte_mbuf/rte_mbuf.h:1694: warning: argument 'm_src' of command @param is not found in the argument list of rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
> ./lib/librte_mbuf/rte_mbuf.h:1694: warning: The following parameters of rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc) are not documented

Thanks for spotting this, I'm adding the fix to the v3.

> 
> With best regards,
> Slava
> 
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Olivier Matz
> > Sent: Thursday, October 17, 2019 17:42
> > To: dev@dpdk.org
> > Cc: Andrew Rybchenko <arybchenko@solarflare.com>; Bruce Richardson
> > <bruce.richardson@intel.com>; Wang, Haiyue <haiyue.wang@intel.com>;
> > Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Wiles, Keith
> > <keith.wiles@intel.com>; Ananyev, Konstantin
> > <konstantin.ananyev@intel.com>; Morten Brørup
> > <mb@smartsharesystems.com>; Stephen Hemminger
> > <stephen@networkplumber.org>; Thomas Monjalon
> > <thomas@monjalon.net>
> > Subject: [dpdk-dev] [PATCH v2] mbuf: support dynamic fields and flags
> > 
> > Many features require to store data inside the mbuf. As the room in mbuf
> > structure is limited, it is not possible to have a field for each feature. Also,
> > changing fields in the mbuf structure can break the API or ABI.
> > 
> > This commit addresses these issues, by enabling the dynamic registration of
> > fields or flags:
> > 
> > - a dynamic field is a named area in the rte_mbuf structure, with a
> >   given size (>= 1 byte) and alignment constraint.
> > - a dynamic flag is a named bit in the rte_mbuf structure.
> > 
> > The typical use case is a PMD that registers space for an offload feature,
> > when the application requests to enable this feature.  As the space in mbuf is
> > limited, the space should only be reserved if it is going to be used (i.e when
> > the application explicitly asks for it).
> > 
> > The registration can be done at any moment, but it is not possible to
> > unregister fields or flags for now.
> > 
> > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > Acked-by: Thomas Monjalon <thomas@monjalon.net>
> > ---
> > 
> > v2
> > 
> > * Rebase on top of master: solve conflict with Stephen's patchset
> >   (packet copy)
> > * Add new apis to register a dynamic field/flag at a specific place
> > * Add a dump function (sugg by David)
> > * Enhance field registration function to select the best offset, keeping
> >   large aligned zones as much as possible (sugg by Konstantin)
> > * Use a size_t and unsigned int instead of int when relevant
> >   (sugg by Konstantin)
> > * Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
> >   (sugg by Konstantin)
> > * Remove unused argument in private function (sugg by Konstantin)
> > * Fix and simplify locking (sugg by Konstantin)
> > * Fix minor typo
> > 
> > rfc -> v1
> > 
> > * Rebase on top of master
> > * Change registration API to use a structure instead of
> >   variables, getting rid of #defines (Stephen's comment)
> > * Update flag registration to use a similar API as fields.
> > * Change max name length from 32 to 64 (sugg. by Thomas)
> > * Enhance API documentation (Haiyue's and Andrew's comments)
> > * Add a debug log at registration
> > * Add some words in release note
> > * Did some performance tests (sugg. by Andrew):
> >   On my platform, reading a dynamic field takes ~3 cycles more
> >   than a static field, and ~2 cycles more for writing.
> > 
> >  app/test/test_mbuf.c                   | 145 ++++++-
> >  doc/guides/rel_notes/release_19_11.rst |   7 +
> >  lib/librte_mbuf/Makefile               |   2 +
> >  lib/librte_mbuf/meson.build            |   6 +-
> >  lib/librte_mbuf/rte_mbuf.h             |  23 +-
> >  lib/librte_mbuf/rte_mbuf_dyn.c         | 548 +++++++++++++++++++++++++
> >  lib/librte_mbuf/rte_mbuf_dyn.h         | 226 ++++++++++
> >  lib/librte_mbuf/rte_mbuf_version.map   |   7 +
> >  8 files changed, 959 insertions(+), 5 deletions(-)  create mode 100644
> > lib/librte_mbuf/rte_mbuf_dyn.c  create mode 100644
> > lib/librte_mbuf/rte_mbuf_dyn.h
> > 
> > diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c index
> > b9c2b2500..01cafad59 100644
> > --- a/app/test/test_mbuf.c
> > +++ b/app/test/test_mbuf.c
> > @@ -28,6 +28,7 @@
> >  #include <rte_random.h>
> >  #include <rte_cycles.h>
> >  #include <rte_malloc.h>
> > +#include <rte_mbuf_dyn.h>
> > 
> >  #include "test.h"
> > 
> > @@ -657,7 +658,6 @@ test_attach_from_different_pool(struct
> > rte_mempool *pktmbuf_pool,
> >  		rte_pktmbuf_free(clone2);
> >  	return -1;
> >  }
> > -#undef GOTO_FAIL
> > 
> >  /*
> >   * test allocation and free of mbufs
> > @@ -1276,6 +1276,143 @@ test_tx_offload(void)
> >  	return (v1 == v2) ? 0 : -EINVAL;
> >  }
> > 
> > +static int
> > +test_mbuf_dyn(struct rte_mempool *pktmbuf_pool) {
> > +	const struct rte_mbuf_dynfield dynfield = {
> > +		.name = "test-dynfield",
> > +		.size = sizeof(uint8_t),
> > +		.align = __alignof__(uint8_t),
> > +		.flags = 0,
> > +	};
> > +	const struct rte_mbuf_dynfield dynfield2 = {
> > +		.name = "test-dynfield2",
> > +		.size = sizeof(uint16_t),
> > +		.align = __alignof__(uint16_t),
> > +		.flags = 0,
> > +	};
> > +	const struct rte_mbuf_dynfield dynfield3 = {
> > +		.name = "test-dynfield3",
> > +		.size = sizeof(uint8_t),
> > +		.align = __alignof__(uint8_t),
> > +		.flags = 0,
> > +	};
> > +	const struct rte_mbuf_dynfield dynfield_fail_big = {
> > +		.name = "test-dynfield-fail-big",
> > +		.size = 256,
> > +		.align = 1,
> > +		.flags = 0,
> > +	};
> > +	const struct rte_mbuf_dynfield dynfield_fail_align = {
> > +		.name = "test-dynfield-fail-align",
> > +		.size = 1,
> > +		.align = 3,
> > +		.flags = 0,
> > +	};
> > +	const struct rte_mbuf_dynflag dynflag = {
> > +		.name = "test-dynflag",
> > +		.flags = 0,
> > +	};
> > +	const struct rte_mbuf_dynflag dynflag2 = {
> > +		.name = "test-dynflag2",
> > +		.flags = 0,
> > +	};
> > +	const struct rte_mbuf_dynflag dynflag3 = {
> > +		.name = "test-dynflag3",
> > +		.flags = 0,
> > +	};
> > +	struct rte_mbuf *m = NULL;
> > +	int offset, offset2, offset3;
> > +	int flag, flag2, flag3;
> > +	int ret;
> > +
> > +	printf("Test mbuf dynamic fields and flags\n");
> > +	rte_mbuf_dyn_dump(stdout);
> > +
> > +	offset = rte_mbuf_dynfield_register(&dynfield);
> > +	if (offset == -1)
> > +		GOTO_FAIL("failed to register dynamic field, offset=%d: %s",
> > +			offset, strerror(errno));
> > +
> > +	ret = rte_mbuf_dynfield_register(&dynfield);
> > +	if (ret != offset)
> > +		GOTO_FAIL("failed to lookup dynamic field, ret=%d: %s",
> > +			ret, strerror(errno));
> > +
> > +	offset2 = rte_mbuf_dynfield_register(&dynfield2);
> > +	if (offset2 == -1 || offset2 == offset || (offset2 & 1))
> > +		GOTO_FAIL("failed to register dynamic field 2, offset2=%d:
> > %s",
> > +			offset2, strerror(errno));
> > +
> > +	offset3 = rte_mbuf_dynfield_register_offset(&dynfield3,
> > +				offsetof(struct rte_mbuf, dynfield1[1]));
> > +	if (offset3 != offsetof(struct rte_mbuf, dynfield1[1]))
> > +		GOTO_FAIL("failed to register dynamic field 3, offset=%d:
> > %s",
> > +			offset3, strerror(errno));
> > +
> > +	printf("dynfield: offset=%d, offset2=%d, offset3=%d\n",
> > +		offset, offset2, offset3);
> > +
> > +	ret = rte_mbuf_dynfield_register(&dynfield_fail_big);
> > +	if (ret != -1)
> > +		GOTO_FAIL("dynamic field creation should fail (too big)");
> > +
> > +	ret = rte_mbuf_dynfield_register(&dynfield_fail_align);
> > +	if (ret != -1)
> > +		GOTO_FAIL("dynamic field creation should fail (bad
> > alignment)");
> > +
> > +	ret = rte_mbuf_dynfield_register_offset(&dynfield_fail_align,
> > +				offsetof(struct rte_mbuf, ol_flags));
> > +	if (ret != -1)
> > +		GOTO_FAIL("dynamic field creation should fail (not avail)");
> > +
> > +	flag = rte_mbuf_dynflag_register(&dynflag);
> > +	if (flag == -1)
> > +		GOTO_FAIL("failed to register dynamic flag, flag=%d: %s",
> > +			flag, strerror(errno));
> > +
> > +	ret = rte_mbuf_dynflag_register(&dynflag);
> > +	if (ret != flag)
> > +		GOTO_FAIL("failed to lookup dynamic flag, ret=%d: %s",
> > +			ret, strerror(errno));
> > +
> > +	flag2 = rte_mbuf_dynflag_register(&dynflag2);
> > +	if (flag2 == -1 || flag2 == flag)
> > +		GOTO_FAIL("failed to register dynamic flag 2, flag2=%d: %s",
> > +			flag2, strerror(errno));
> > +
> > +	flag3 = rte_mbuf_dynflag_register_bitnum(&dynflag3,
> > +						rte_bsf64(PKT_LAST_FREE));
> > +	if (flag3 != rte_bsf64(PKT_LAST_FREE))
> > +		GOTO_FAIL("failed to register dynamic flag 3, flag2=%d: %s",
> > +			flag3, strerror(errno));
> > +
> > +	printf("dynflag: flag=%d, flag2=%d, flag3=%d\n", flag, flag2, flag3);
> > +
> > +	/* set, get dynamic field */
> > +	m = rte_pktmbuf_alloc(pktmbuf_pool);
> > +	if (m == NULL)
> > +		GOTO_FAIL("Cannot allocate mbuf");
> > +
> > +	*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) = 1;
> > +	if (*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) != 1)
> > +		GOTO_FAIL("failed to read dynamic field");
> > +	*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) = 1000;
> > +	if (*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) != 1000)
> > +		GOTO_FAIL("failed to read dynamic field");
> > +
> > +	/* set a dynamic flag */
> > +	m->ol_flags |= (1ULL << flag);
> > +
> > +	rte_mbuf_dyn_dump(stdout);
> > +	rte_pktmbuf_free(m);
> > +	return 0;
> > +fail:
> > +	rte_pktmbuf_free(m);
> > +	return -1;
> > +}
> > +#undef GOTO_FAIL
> > +
> >  static int
> >  test_mbuf(void)
> >  {
> > @@ -1295,6 +1432,12 @@ test_mbuf(void)
> >  		goto err;
> >  	}
> > 
> > +	/* test registration of dynamic fields and flags */
> > +	if (test_mbuf_dyn(pktmbuf_pool) < 0) {
> > +		printf("mbuf dynflag test failed\n");
> > +		goto err;
> > +	}
> > +
> >  	/* create a specific pktmbuf pool with a priv_size != 0 and no data
> >  	 * room size */
> >  	pktmbuf_pool2 = rte_pktmbuf_pool_create("test_pktmbuf_pool2",
> > diff --git a/doc/guides/rel_notes/release_19_11.rst
> > b/doc/guides/rel_notes/release_19_11.rst
> > index 85953b962..9e9c94554 100644
> > --- a/doc/guides/rel_notes/release_19_11.rst
> > +++ b/doc/guides/rel_notes/release_19_11.rst
> > @@ -21,6 +21,13 @@ DPDK Release 19.11
> > 
> >        xdg-open build/doc/html/guides/rel_notes/release_19_11.html
> > 
> > +* **Add support of support dynamic fields and flags in mbuf.**
> > +
> > +  This new feature adds the ability to dynamically register some room
> > + for a field or a flag in the mbuf structure. This is typically used
> > + for specific offload features, where adding a static field or flag  in
> > + the mbuf is not justified.
> > +
> > 
> >  New Features
> >  ------------
> > diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile index
> > c8f6d2689..5a9bcee73 100644
> > --- a/lib/librte_mbuf/Makefile
> > +++ b/lib/librte_mbuf/Makefile
> > @@ -17,8 +17,10 @@ LIBABIVER := 5
> > 
> >  # all source are stored in SRCS-y
> >  SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c
> > rte_mbuf_pool_ops.c
> > +SRCS-$(CONFIG_RTE_LIBRTE_MBUF) += rte_mbuf_dyn.c
> > 
> >  # install includes
> >  SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h
> > rte_mbuf_ptype.h rte_mbuf_pool_ops.h
> > +SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include += rte_mbuf_dyn.h
> > 
> >  include $(RTE_SDK)/mk/rte.lib.mk
> > diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build index
> > 6cc11ebb4..9137e8f26 100644
> > --- a/lib/librte_mbuf/meson.build
> > +++ b/lib/librte_mbuf/meson.build
> > @@ -2,8 +2,10 @@
> >  # Copyright(c) 2017 Intel Corporation
> > 
> >  version = 5
> > -sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c') -
> > headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
> > +sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c',
> > +	'rte_mbuf_dyn.c')
> > +headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h',
> > +	'rte_mbuf_dyn.h')
> >  deps += ['mempool']
> > 
> >  allow_experimental_apis = true
> > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index
> > fb0849ac1..5740b1e93 100644
> > --- a/lib/librte_mbuf/rte_mbuf.h
> > +++ b/lib/librte_mbuf/rte_mbuf.h
> > @@ -198,9 +198,12 @@ extern "C" {
> >  #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
> >  #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
> > 
> > -/* add new RX flags here */
> > +/* add new RX flags here, don't forget to update PKT_FIRST_FREE */
> > 
> > -/* add new TX flags here */
> > +#define PKT_FIRST_FREE (1ULL << 23)
> > +#define PKT_LAST_FREE (1ULL << 39)
> > +
> > +/* add new TX flags here, don't forget to update PKT_LAST_FREE  */
> > 
> >  /**
> >   * Indicate that the metadata field in the mbuf is in use.
> > @@ -738,6 +741,7 @@ struct rte_mbuf {
> >  	 */
> >  	struct rte_mbuf_ext_shared_info *shinfo;
> > 
> > +	uint64_t dynfield1[2]; /**< Reserved for dynamic fields. */
> >  } __rte_cache_aligned;
> > 
> >  /**
> > @@ -1684,6 +1688,20 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m,
> > void *buf_addr,
> >   */
> >  #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
> > 
> > +/**
> > + * Copy dynamic fields from m_src to m_dst.
> > + *
> > + * @param m_dst
> > + *   The destination mbuf.
> > + * @param m_src
> > + *   The source mbuf.
> > + */
> > +static inline void
> > +rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf
> > +*msrc) {
> > +	memcpy(&mdst->dynfield1, msrc->dynfield1, sizeof(mdst-
> > >dynfield1)); }
> > +
> >  /* internal */
> >  static inline void
> >  __rte_pktmbuf_copy_hdr(struct rte_mbuf *mdst, const struct rte_mbuf
> > *msrc) @@ -1695,6 +1713,7 @@ __rte_pktmbuf_copy_hdr(struct rte_mbuf
> > *mdst, const struct rte_mbuf *msrc)
> >  	mdst->hash = msrc->hash;
> >  	mdst->packet_type = msrc->packet_type;
> >  	mdst->timestamp = msrc->timestamp;
> > +	rte_mbuf_dynfield_copy(mdst, msrc);
> >  }
> > 
> >  /**
> > diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c
> > b/lib/librte_mbuf/rte_mbuf_dyn.c new file mode 100644 index
> > 000000000..9ef235483
> > --- /dev/null
> > +++ b/lib/librte_mbuf/rte_mbuf_dyn.c
> > @@ -0,0 +1,548 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright 2019 6WIND S.A.
> > + */
> > +
> > +#include <sys/queue.h>
> > +#include <stdint.h>
> > +#include <limits.h>
> > +
> > +#include <rte_common.h>
> > +#include <rte_eal.h>
> > +#include <rte_eal_memconfig.h>
> > +#include <rte_tailq.h>
> > +#include <rte_errno.h>
> > +#include <rte_malloc.h>
> > +#include <rte_string_fns.h>
> > +#include <rte_mbuf.h>
> > +#include <rte_mbuf_dyn.h>
> > +
> > +#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
> > +
> > +struct mbuf_dynfield_elt {
> > +	TAILQ_ENTRY(mbuf_dynfield_elt) next;
> > +	struct rte_mbuf_dynfield params;
> > +	size_t offset;
> > +};
> > +TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
> > +
> > +static struct rte_tailq_elem mbuf_dynfield_tailq = {
> > +	.name = "RTE_MBUF_DYNFIELD",
> > +};
> > +EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
> > +
> > +struct mbuf_dynflag_elt {
> > +	TAILQ_ENTRY(mbuf_dynflag_elt) next;
> > +	struct rte_mbuf_dynflag params;
> > +	unsigned int bitnum;
> > +};
> > +TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
> > +
> > +static struct rte_tailq_elem mbuf_dynflag_tailq = {
> > +	.name = "RTE_MBUF_DYNFLAG",
> > +};
> > +EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
> > +
> > +struct mbuf_dyn_shm {
> > +	/**
> > +	 * For each mbuf byte, free_space[i] != 0 if space is free.
> > +	 * The value is the size of the biggest aligned element that
> > +	 * can fit in the zone.
> > +	 */
> > +	uint8_t free_space[sizeof(struct rte_mbuf)];
> > +	/** Bitfield of available flags. */
> > +	uint64_t free_flags;
> > +};
> > +static struct mbuf_dyn_shm *shm;
> > +
> > +/* Set the value of free_space[] according to the size and alignment of
> > + * the free areas. This helps to select the best place when reserving a
> > + * dynamic field. Assume tailq is locked.
> > + */
> > +static void
> > +process_score(void)
> > +{
> > +	size_t off, align, size, i;
> > +
> > +	/* first, erase previous info */
> > +	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
> > +		if (shm->free_space[i])
> > +			shm->free_space[i] = 1;
> > +	}
> > +
> > +	for (off = 0; off < sizeof(struct rte_mbuf); off++) {
> > +		/* get the size of the free zone */
> > +		for (size = 0; shm->free_space[off + size]; size++)
> > +			;
> > +		if (size == 0)
> > +			continue;
> > +
> > +		/* get the alignment of biggest object that can fit in
> > +		 * the zone at this offset.
> > +		 */
> > +		for (align = 1;
> > +		     (off % (align << 1)) == 0 && (align << 1) <= size;
> > +		     align <<= 1)
> > +			;
> > +
> > +		/* save it in free_space[] */
> > +		for (i = off; i < off + size; i++)
> > +			shm->free_space[i] = RTE_MAX(align, shm-
> > >free_space[i]);
> > +	}
> > +}
> > +
> > +/* Allocate and initialize the shared memory. Assume tailq is locked */
> > +static int
> > +init_shared_mem(void)
> > +{
> > +	const struct rte_memzone *mz;
> > +	uint64_t mask;
> > +
> > +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > +		mz =
> > rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
> > +						sizeof(struct
> > mbuf_dyn_shm),
> > +						SOCKET_ID_ANY, 0,
> > +						RTE_CACHE_LINE_SIZE);
> > +	} else {
> > +		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
> > +	}
> > +	if (mz == NULL)
> > +		return -1;
> > +
> > +	shm = mz->addr;
> > +
> > +#define mark_free(field)						\
> > +	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
> > +		1, sizeof(((struct rte_mbuf *)0)->field))
> > +
> > +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > +		/* init free_space, keep it sync'd with
> > +		 * rte_mbuf_dynfield_copy().
> > +		 */
> > +		memset(shm, 0, sizeof(*shm));
> > +		mark_free(dynfield1);
> > +
> > +		/* init free_flags */
> > +		for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask
> > <<= 1)
> > +			shm->free_flags |= mask;
> > +
> > +		process_score();
> > +	}
> > +#undef mark_free
> > +
> > +	return 0;
> > +}
> > +
> > +/* check if this offset can be used */
> > +static int
> > +check_offset(size_t offset, size_t size, size_t align) {
> > +	size_t i;
> > +
> > +	if ((offset & (align - 1)) != 0)
> > +		return -1;
> > +	if (offset + size > sizeof(struct rte_mbuf))
> > +		return -1;
> > +
> > +	for (i = 0; i < size; i++) {
> > +		if (!shm->free_space[i + offset])
> > +			return -1;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +/* assume tailq is locked */
> > +static struct mbuf_dynfield_elt *
> > +__mbuf_dynfield_lookup(const char *name) {
> > +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> > +	struct mbuf_dynfield_elt *mbuf_dynfield;
> > +	struct rte_tailq_entry *te;
> > +
> > +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> > +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> > +
> > +	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
> > +		mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
> > +		if (strcmp(name, mbuf_dynfield->params.name) == 0)
> > +			break;
> > +	}
> > +
> > +	if (te == NULL) {
> > +		rte_errno = ENOENT;
> > +		return NULL;
> > +	}
> > +
> > +	return mbuf_dynfield;
> > +}
> > +
> > +int
> > +rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield
> > +*params) {
> > +	struct mbuf_dynfield_elt *mbuf_dynfield;
> > +
> > +	if (shm == NULL) {
> > +		rte_errno = ENOENT;
> > +		return -1;
> > +	}
> > +
> > +	rte_mcfg_tailq_read_lock();
> > +	mbuf_dynfield = __mbuf_dynfield_lookup(name);
> > +	rte_mcfg_tailq_read_unlock();
> > +
> > +	if (mbuf_dynfield == NULL) {
> > +		rte_errno = ENOENT;
> > +		return -1;
> > +	}
> > +
> > +	if (params != NULL)
> > +		memcpy(params, &mbuf_dynfield->params,
> > sizeof(*params));
> > +
> > +	return mbuf_dynfield->offset;
> > +}
> > +
> > +static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
> > +		const struct rte_mbuf_dynfield *params2) {
> > +	if (strcmp(params1->name, params2->name))
> > +		return -1;
> > +	if (params1->size != params2->size)
> > +		return -1;
> > +	if (params1->align != params2->align)
> > +		return -1;
> > +	if (params1->flags != params2->flags)
> > +		return -1;
> > +	return 0;
> > +}
> > +
> > +/* assume tailq is locked */
> > +static int
> > +__rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield
> > *params,
> > +				size_t req)
> > +{
> > +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> > +	struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
> > +	struct rte_tailq_entry *te = NULL;
> > +	unsigned int best_zone = UINT_MAX;
> > +	size_t i, offset;
> > +	int ret;
> > +
> > +	if (shm == NULL && init_shared_mem() < 0)
> > +		return -1;
> > +
> > +	mbuf_dynfield = __mbuf_dynfield_lookup(params->name);
> > +	if (mbuf_dynfield != NULL) {
> > +		if (req != SIZE_MAX && req != mbuf_dynfield->offset) {
> > +			rte_errno = EEXIST;
> > +			return -1;
> > +		}
> > +		if (mbuf_dynfield_cmp(params, &mbuf_dynfield->params) <
> > 0) {
> > +			rte_errno = EEXIST;
> > +			return -1;
> > +		}
> > +		return mbuf_dynfield->offset;
> > +	}
> > +
> > +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> > +		rte_errno = EPERM;
> > +		return -1;
> > +	}
> > +
> > +	if (req == SIZE_MAX) {
> > +		for (offset = 0;
> > +		     offset < sizeof(struct rte_mbuf);
> > +		     offset++) {
> > +			if (check_offset(offset, params->size,
> > +						params->align) == 0 &&
> > +					shm->free_space[offset] <
> > best_zone) {
> > +				best_zone = shm->free_space[offset];
> > +				req = offset;
> > +			}
> > +		}
> > +		if (req == SIZE_MAX) {
> > +			rte_errno = ENOENT;
> > +			return -1;
> > +		}
> > +	} else {
> > +		if (check_offset(req, params->size, params->align) < 0) {
> > +			rte_errno = EBUSY;
> > +			return -1;
> > +		}
> > +	}
> > +
> > +	offset = req;
> > +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> > +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> > +
> > +	te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
> > +	if (te == NULL)
> > +		return -1;
> > +
> > +	mbuf_dynfield = rte_zmalloc("mbuf_dynfield",
> > sizeof(*mbuf_dynfield), 0);
> > +	if (mbuf_dynfield == NULL) {
> > +		rte_free(te);
> > +		return -1;
> > +	}
> > +
> > +	ret = strlcpy(mbuf_dynfield->params.name, params->name,
> > +		sizeof(mbuf_dynfield->params.name));
> > +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->params.name)) {
> > +		rte_errno = ENAMETOOLONG;
> > +		rte_free(mbuf_dynfield);
> > +		rte_free(te);
> > +		return -1;
> > +	}
> > +	memcpy(&mbuf_dynfield->params, params, sizeof(mbuf_dynfield-
> > >params));
> > +	mbuf_dynfield->offset = offset;
> > +	te->data = mbuf_dynfield;
> > +
> > +	TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
> > +
> > +	for (i = offset; i < offset + params->size; i++)
> > +		shm->free_space[i] = 0;
> > +	process_score();
> > +
> > +	RTE_LOG(DEBUG, MBUF, "Registered dynamic field %s (sz=%zu,
> > al=%zu, fl=0x%x) -> %zd\n",
> > +		params->name, params->size, params->align, params->flags,
> > +		offset);
> > +
> > +	return offset;
> > +}
> > +
> > +int
> > +rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield
> > *params,
> > +				size_t req)
> > +{
> > +	int ret;
> > +
> > +	if (params->size >= sizeof(struct rte_mbuf)) {
> > +		rte_errno = EINVAL;
> > +		return -1;
> > +	}
> > +	if (!rte_is_power_of_2(params->align)) {
> > +		rte_errno = EINVAL;
> > +		return -1;
> > +	}
> > +	if (params->flags != 0) {
> > +		rte_errno = EINVAL;
> > +		return -1;
> > +	}
> > +
> > +	rte_mcfg_tailq_write_lock();
> > +	ret = __rte_mbuf_dynfield_register_offset(params, req);
> > +	rte_mcfg_tailq_write_unlock();
> > +
> > +	return ret;
> > +}
> > +
> > +int
> > +rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params) {
> > +	return rte_mbuf_dynfield_register_offset(params, SIZE_MAX); }
> > +
> > +/* assume tailq is locked */
> > +static struct mbuf_dynflag_elt *
> > +__mbuf_dynflag_lookup(const char *name) {
> > +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> > +	struct mbuf_dynflag_elt *mbuf_dynflag;
> > +	struct rte_tailq_entry *te;
> > +
> > +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> > +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> > +
> > +	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
> > +		mbuf_dynflag = (struct mbuf_dynflag_elt *)te->data;
> > +		if (strncmp(name, mbuf_dynflag->params.name,
> > +				RTE_MBUF_DYN_NAMESIZE) == 0)
> > +			break;
> > +	}
> > +
> > +	if (te == NULL) {
> > +		rte_errno = ENOENT;
> > +		return NULL;
> > +	}
> > +
> > +	return mbuf_dynflag;
> > +}
> > +
> > +int
> > +rte_mbuf_dynflag_lookup(const char *name,
> > +			struct rte_mbuf_dynflag *params)
> > +{
> > +	struct mbuf_dynflag_elt *mbuf_dynflag;
> > +
> > +	if (shm == NULL) {
> > +		rte_errno = ENOENT;
> > +		return -1;
> > +	}
> > +
> > +	rte_mcfg_tailq_read_lock();
> > +	mbuf_dynflag = __mbuf_dynflag_lookup(name);
> > +	rte_mcfg_tailq_read_unlock();
> > +
> > +	if (mbuf_dynflag == NULL) {
> > +		rte_errno = ENOENT;
> > +		return -1;
> > +	}
> > +
> > +	if (params != NULL)
> > +		memcpy(params, &mbuf_dynflag->params, sizeof(*params));
> > +
> > +	return mbuf_dynflag->bitnum;
> > +}
> > +
> > +static int mbuf_dynflag_cmp(const struct rte_mbuf_dynflag *params1,
> > +		const struct rte_mbuf_dynflag *params2) {
> > +	if (strcmp(params1->name, params2->name))
> > +		return -1;
> > +	if (params1->flags != params2->flags)
> > +		return -1;
> > +	return 0;
> > +}
> > +
> > +/* assume tailq is locked */
> > +static int
> > +__rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag
> > *params,
> > +				unsigned int req)
> > +{
> > +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> > +	struct mbuf_dynflag_elt *mbuf_dynflag = NULL;
> > +	struct rte_tailq_entry *te = NULL;
> > +	unsigned int bitnum;
> > +	int ret;
> > +
> > +	if (shm == NULL && init_shared_mem() < 0)
> > +		return -1;
> > +
> > +	mbuf_dynflag = __mbuf_dynflag_lookup(params->name);
> > +	if (mbuf_dynflag != NULL) {
> > +		if (req != UINT_MAX && req != mbuf_dynflag->bitnum) {
> > +			rte_errno = EEXIST;
> > +			return -1;
> > +		}
> > +		if (mbuf_dynflag_cmp(params, &mbuf_dynflag->params) < 0)
> > {
> > +			rte_errno = EEXIST;
> > +			return -1;
> > +		}
> > +		return mbuf_dynflag->bitnum;
> > +	}
> > +
> > +	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> > +		rte_errno = EPERM;
> > +		return -1;
> > +	}
> > +
> > +	if (req == UINT_MAX) {
> > +		if (shm->free_flags == 0) {
> > +			rte_errno = ENOENT;
> > +			return -1;
> > +		}
> > +		bitnum = rte_bsf64(shm->free_flags);
> > +	} else {
> > +		if ((shm->free_flags & (1ULL << req)) == 0) {
> > +			rte_errno = EBUSY;
> > +			return -1;
> > +		}
> > +		bitnum = req;
> > +	}
> > +
> > +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> > +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> > +
> > +	te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
> > +	if (te == NULL)
> > +		return -1;
> > +
> > +	mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag),
> > 0);
> > +	if (mbuf_dynflag == NULL) {
> > +		rte_free(te);
> > +		return -1;
> > +	}
> > +
> > +	ret = strlcpy(mbuf_dynflag->params.name, params->name,
> > +		sizeof(mbuf_dynflag->params.name));
> > +	if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->params.name)) {
> > +		rte_free(mbuf_dynflag);
> > +		rte_free(te);
> > +		rte_errno = ENAMETOOLONG;
> > +		return -1;
> > +	}
> > +	mbuf_dynflag->bitnum = bitnum;
> > +	te->data = mbuf_dynflag;
> > +
> > +	TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
> > +
> > +	shm->free_flags &= ~(1ULL << bitnum);
> > +
> > +	RTE_LOG(DEBUG, MBUF, "Registered dynamic flag %s (fl=0x%x) ->
> > %u\n",
> > +		params->name, params->flags, bitnum);
> > +
> > +	return bitnum;
> > +}
> > +
> > +int
> > +rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
> > +				unsigned int req)
> > +{
> > +	int ret;
> > +
> > +	if (req != UINT_MAX && req >= 64) {
> > +		rte_errno = EINVAL;
> > +		return -1;
> > +	}
> > +
> > +	rte_mcfg_tailq_write_lock();
> > +	ret = __rte_mbuf_dynflag_register_bitnum(params, req);
> > +	rte_mcfg_tailq_write_unlock();
> > +
> > +	return ret;
> > +}
> > +
> > +int
> > +rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params) {
> > +	return rte_mbuf_dynflag_register_bitnum(params, UINT_MAX); }
> > +
> > +void rte_mbuf_dyn_dump(FILE *out)
> > +{
> > +	struct mbuf_dynfield_list *mbuf_dynfield_list;
> > +	struct mbuf_dynfield_elt *dynfield;
> > +	struct mbuf_dynflag_list *mbuf_dynflag_list;
> > +	struct mbuf_dynflag_elt *dynflag;
> > +	struct rte_tailq_entry *te;
> > +	size_t i;
> > +
> > +	rte_mcfg_tailq_write_lock();
> > +	init_shared_mem();
> > +	fprintf(out, "Reserved fields:\n");
> > +	mbuf_dynfield_list = RTE_TAILQ_CAST(
> > +		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
> > +	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
> > +		dynfield = (struct mbuf_dynfield_elt *)te->data;
> > +		fprintf(out, "  name=%s offset=%zd size=%zd align=%zd
> > flags=%x\n",
> > +			dynfield->params.name, dynfield->offset,
> > +			dynfield->params.size, dynfield->params.align,
> > +			dynfield->params.flags);
> > +	}
> > +	fprintf(out, "Reserved flags:\n");
> > +	mbuf_dynflag_list = RTE_TAILQ_CAST(
> > +		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
> > +	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
> > +		dynflag = (struct mbuf_dynflag_elt *)te->data;
> > +		fprintf(out, "  name=%s bitnum=%u flags=%x\n",
> > +			dynflag->params.name, dynflag->bitnum,
> > +			dynflag->params.flags);
> > +	}
> > +	fprintf(out, "Free space in mbuf (0 = free, value = zone
> > alignment):\n");
> > +	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
> > +		if ((i % 8) == 0)
> > +			fprintf(out, "  %4.4zx: ", i);
> > +		fprintf(out, "%2.2x%s", shm->free_space[i],
> > +			(i % 8 != 7) ? " " : "\n");
> > +	}
> > +	rte_mcfg_tailq_write_unlock();
> > +}
> > diff --git a/lib/librte_mbuf/rte_mbuf_dyn.h
> > b/lib/librte_mbuf/rte_mbuf_dyn.h new file mode 100644 index
> > 000000000..307613c96
> > --- /dev/null
> > +++ b/lib/librte_mbuf/rte_mbuf_dyn.h
> > @@ -0,0 +1,226 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright 2019 6WIND S.A.
> > + */
> > +
> > +#ifndef _RTE_MBUF_DYN_H_
> > +#define _RTE_MBUF_DYN_H_
> > +
> > +/**
> > + * @file
> > + * RTE Mbuf dynamic fields and flags
> > + *
> > + * Many features require to store data inside the mbuf. As the room in
> > + * mbuf structure is limited, it is not possible to have a field for
> > + * each feature. Also, changing fields in the mbuf structure can break
> > + * the API or ABI.
> > + *
> > + * This module addresses this issue, by enabling the dynamic
> > + * registration of fields or flags:
> > + *
> > + * - a dynamic field is a named area in the rte_mbuf structure, with a
> > + *   given size (>= 1 byte) and alignment constraint.
> > + * - a dynamic flag is a named bit in the rte_mbuf structure, stored
> > + *   in mbuf->ol_flags.
> > + *
> > + * The typical use case is when a specific offload feature requires to
> > + * register a dedicated offload field in the mbuf structure, and adding
> > + * a static field or flag is not justified.
> > + *
> > + * Example of use:
> > + *
> > + * - A rte_mbuf_dynfield structure is defined, containing the parameters
> > + *   of the dynamic field to be registered:
> > + *   const struct rte_mbuf_dynfield rte_dynfield_my_feature = { ... };
> > + * - The application initializes the PMD, and asks for this feature
> > + *   at port initialization by passing DEV_RX_OFFLOAD_MY_FEATURE in
> > + *   rxconf. This will make the PMD to register the field by calling
> > + *   rte_mbuf_dynfield_register(&rte_dynfield_my_feature). The PMD
> > + *   stores the returned offset.
> > + * - The application that uses the offload feature also registers
> > + *   the field to retrieve the same offset.
> > + * - When the PMD receives a packet, it can set the field:
> > + *   *RTE_MBUF_DYNFIELD(m, offset, <type *>) = value;
> > + * - In the main loop, the application can retrieve the value with
> > + *   the same macro.
> > + *
> > + * To avoid wasting space, the dynamic fields or flags must only be
> > + * reserved on demand, when an application asks for the related feature.
> > + *
> > + * The registration can be done at any moment, but it is not possible
> > + * to unregister fields or flags for now.
> > + *
> > + * A dynamic field can be reserved and used by an application only.
> > + * It can for instance be a packet mark.
> > + */
> > +
> > +#include <sys/types.h>
> > +/**
> > + * Maximum length of the dynamic field or flag string.
> > + */
> > +#define RTE_MBUF_DYN_NAMESIZE 64
> > +
> > +/**
> > + * Structure describing the parameters of a mbuf dynamic field.
> > + */
> > +struct rte_mbuf_dynfield {
> > +	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the field. */
> > +	size_t size;        /**< The number of bytes to reserve. */
> > +	size_t align;       /**< The alignment constraint (power of 2). */
> > +	unsigned int flags; /**< Reserved for future use, must be 0. */ };
> > +
> > +/**
> > + * Structure describing the parameters of a mbuf dynamic flag.
> > + */
> > +struct rte_mbuf_dynflag {
> > +	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the dynamic
> > flag. */
> > +	unsigned int flags; /**< Reserved for future use, must be 0. */ };
> > +
> > +/**
> > + * Register space for a dynamic field in the mbuf structure.
> > + *
> > + * If the field is already registered (same name and parameters), its
> > + * offset is returned.
> > + *
> > + * @param params
> > + *   A structure containing the requested parameters (name, size,
> > + *   alignment constraint and flags).
> > + * @return
> > + *   The offset in the mbuf structure, or -1 on error.
> > + *   Possible values for rte_errno:
> > + *   - EINVAL: invalid parameters (size, align, or flags).
> > + *   - EEXIST: this name is already register with different parameters.
> > + *   - EPERM: called from a secondary process.
> > + *   - ENOENT: not enough room in mbuf.
> > + *   - ENOMEM: allocation failure.
> > + *   - ENAMETOOLONG: name does not ends with \0.
> > + */
> > +__rte_experimental
> > +int rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params);
> > +
> > +/**
> > + * Register space for a dynamic field in the mbuf structure at offset.
> > + *
> > + * If the field is already registered (same name, parameters and
> > +offset),
> > + * the offset is returned.
> > + *
> > + * @param params
> > + *   A structure containing the requested parameters (name, size,
> > + *   alignment constraint and flags).
> > + * @param offset
> > + *   The requested offset. Ignored if SIZE_MAX is passed.
> > + * @return
> > + *   The offset in the mbuf structure, or -1 on error.
> > + *   Possible values for rte_errno:
> > + *   - EINVAL: invalid parameters (size, align, flags, or offset).
> > + *   - EEXIST: this name is already register with different parameters.
> > + *   - EBUSY: the requested offset cannot be used.
> > + *   - EPERM: called from a secondary process.
> > + *   - ENOENT: not enough room in mbuf.
> > + *   - ENOMEM: allocation failure.
> > + *   - ENAMETOOLONG: name does not ends with \0.
> > + */
> > +__rte_experimental
> > +int rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield
> > *params,
> > +				size_t offset);
> > +
> > +/**
> > + * Lookup for a registered dynamic mbuf field.
> > + *
> > + * @param name
> > + *   A string identifying the dynamic field.
> > + * @param params
> > + *   If not NULL, and if the lookup is successful, the structure is
> > + *   filled with the parameters of the dynamic field.
> > + * @return
> > + *   The offset of this field in the mbuf structure, or -1 on error.
> > + *   Possible values for rte_errno:
> > + *   - ENOENT: no dynamic field matches this name.
> > + */
> > +__rte_experimental
> > +int rte_mbuf_dynfield_lookup(const char *name,
> > +			struct rte_mbuf_dynfield *params);
> > +
> > +/**
> > + * Register a dynamic flag in the mbuf structure.
> > + *
> > + * If the flag is already registered (same name and parameters), its
> > + * bitnum is returned.
> > + *
> > + * @param params
> > + *   A structure containing the requested parameters of the dynamic
> > + *   flag (name and options).
> > + * @return
> > + *   The number of the reserved bit, or -1 on error.
> > + *   Possible values for rte_errno:
> > + *   - EINVAL: invalid parameters (size, align, or flags).
> > + *   - EEXIST: this name is already register with different parameters.
> > + *   - EPERM: called from a secondary process.
> > + *   - ENOENT: no more flag available.
> > + *   - ENOMEM: allocation failure.
> > + *   - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE -
> > 1.
> > + */
> > +__rte_experimental
> > +int rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params);
> > +
> > +/**
> > + * Register a dynamic flag in the mbuf structure specifying bitnum.
> > + *
> > + * If the flag is already registered (same name, parameters and
> > +bitnum),
> > + * the bitnum is returned.
> > + *
> > + * @param params
> > + *   A structure containing the requested parameters of the dynamic
> > + *   flag (name and options).
> > + * @param bitnum
> > + *   The requested bitnum. Ignored if UINT_MAX is passed.
> > + * @return
> > + *   The number of the reserved bit, or -1 on error.
> > + *   Possible values for rte_errno:
> > + *   - EINVAL: invalid parameters (size, align, or flags).
> > + *   - EEXIST: this name is already register with different parameters.
> > + *   - EBUSY: the requested bitnum cannot be used.
> > + *   - EPERM: called from a secondary process.
> > + *   - ENOENT: no more flag available.
> > + *   - ENOMEM: allocation failure.
> > + *   - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE -
> > 1.
> > + */
> > +__rte_experimental
> > +int rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag
> > *params,
> > +				unsigned int bitnum);
> > +
> > +/**
> > + * Lookup for a registered dynamic mbuf flag.
> > + *
> > + * @param name
> > + *   A string identifying the dynamic flag.
> > + * @param params
> > + *   If not NULL, and if the lookup is successful, the structure is
> > + *   filled with the parameters of the dynamic flag.
> > + * @return
> > + *   The offset of this flag in the mbuf structure, or -1 on error.
> > + *   Possible values for rte_errno:
> > + *   - ENOENT: no dynamic flag matches this name.
> > + */
> > +__rte_experimental
> > +int rte_mbuf_dynflag_lookup(const char *name,
> > +			struct rte_mbuf_dynflag *params);
> > +
> > +/**
> > + * Helper macro to access to a dynamic field.
> > + */
> > +#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) +
> > +(offset)))
> > +
> > +/**
> > + * Dump the status of dynamic fields and flags.
> > + *
> > + * @param out
> > + *   The stream where the status is displayed.
> > + */
> > +__rte_experimental
> > +void rte_mbuf_dyn_dump(FILE *out);
> > +
> > +/* Placeholder for dynamic fields and flags declarations. */
> > +
> > +#endif
> > diff --git a/lib/librte_mbuf/rte_mbuf_version.map
> > b/lib/librte_mbuf/rte_mbuf_version.map
> > index 519fead35..9bf5ca37a 100644
> > --- a/lib/librte_mbuf/rte_mbuf_version.map
> > +++ b/lib/librte_mbuf/rte_mbuf_version.map
> > @@ -58,6 +58,13 @@ EXPERIMENTAL {
> >  	global:
> > 
> >  	rte_mbuf_check;
> > +	rte_mbuf_dynfield_lookup;
> > +	rte_mbuf_dynfield_register;
> > +	rte_mbuf_dynfield_register_offset;
> > +	rte_mbuf_dynflag_lookup;
> > +	rte_mbuf_dynflag_register;
> > +	rte_mbuf_dynflag_register_bitnum;
> > +	rte_mbuf_dyn_dump;
> >  	rte_pktmbuf_copy;
> > 
> >  } DPDK_18.08;
> > --
> > 2.20.1
> 

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [dpdk-dev] [PATCH v3] mbuf: support dynamic fields and flags
  2019-07-10  9:29 [dpdk-dev] [RFC] mbuf: support dynamic fields and flags Olivier Matz
                   ` (5 preceding siblings ...)
  2019-10-17 14:42 ` [dpdk-dev] [PATCH v2] " Olivier Matz
@ 2019-10-24  8:13 ` Olivier Matz
  2019-10-24 15:30   ` Stephen Hemminger
  2019-10-24 16:40   ` Thomas Monjalon
  2019-10-26 12:39 ` [dpdk-dev] [PATCH v4] " Olivier Matz
  7 siblings, 2 replies; 64+ messages in thread
From: Olivier Matz @ 2019-10-24  8:13 UTC (permalink / raw)
  To: dev
  Cc: Andrew Rybchenko, Bruce Richardson, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Shahaf Shuler, Stephen Hemminger,
	Thomas Monjalon, Slava Ovsiienko

Many features require to store data inside the mbuf. As the room in mbuf
structure is limited, it is not possible to have a field for each
feature. Also, changing fields in the mbuf structure can break the API
or ABI.

This commit addresses these issues, by enabling the dynamic registration
of fields or flags:

- a dynamic field is a named area in the rte_mbuf structure, with a
  given size (>= 1 byte) and alignment constraint.
- a dynamic flag is a named bit in the rte_mbuf structure.

The typical use case is a PMD that registers space for an offload
feature, when the application requests to enable this feature.  As
the space in mbuf is limited, the space should only be reserved if it
is going to be used (i.e when the application explicitly asks for it).

The registration can be done at any moment, but it is not possible
to unregister fields or flags.

Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
Acked-by: Thomas Monjalon <thomas@monjalon.net>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---

v3

* define mark_free() macro outside the init_shared_mem() function
  (Konstantin)
* better document automatic field placement (Konstantin)
* introduce RTE_SIZEOF_FIELD() to get the size of a field in
  a structure (Haiyue)
* fix api doc generation (Slava)
* document dynamic field and flags naming conventions

v2

* Rebase on top of master: solve conflict with Stephen's patchset
  (packet copy)
* Add new apis to register a dynamic field/flag at a specific place
* Add a dump function (sugg by David)
* Enhance field registration function to select the best offset, keeping
  large aligned zones as much as possible (sugg by Konstantin)
* Use a size_t and unsigned int instead of int when relevant
  (sugg by Konstantin)
* Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
  (sugg by Konstantin)
* Remove unused argument in private function (sugg by Konstantin)
* Fix and simplify locking (sugg by Konstantin)
* Fix minor typo

rfc -> v1

* Rebase on top of master
* Change registration API to use a structure instead of
  variables, getting rid of #defines (Stephen's comment)
* Update flag registration to use a similar API as fields.
* Change max name length from 32 to 64 (sugg. by Thomas)
* Enhance API documentation (Haiyue's and Andrew's comments)
* Add a debug log at registration
* Add some words in release note
* Did some performance tests (sugg. by Andrew):
  On my platform, reading a dynamic field takes ~3 cycles more
  than a static field, and ~2 cycles more for writing.


 app/test/test_mbuf.c                       | 145 +++++-
 doc/guides/rel_notes/release_19_11.rst     |   7 +
 lib/librte_eal/common/include/rte_common.h |  12 +
 lib/librte_mbuf/Makefile                   |   2 +
 lib/librte_mbuf/meson.build                |   6 +-
 lib/librte_mbuf/rte_mbuf.h                 |  23 +-
 lib/librte_mbuf/rte_mbuf_dyn.c             | 553 +++++++++++++++++++++
 lib/librte_mbuf/rte_mbuf_dyn.h             | 239 +++++++++
 lib/librte_mbuf/rte_mbuf_version.map       |   7 +
 9 files changed, 989 insertions(+), 5 deletions(-)
 create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
 create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h

diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index c21ef64c8..e9be430af 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -29,6 +29,7 @@
 #include <rte_random.h>
 #include <rte_cycles.h>
 #include <rte_malloc.h>
+#include <rte_mbuf_dyn.h>
 
 #include "test.h"
 
@@ -661,7 +662,6 @@ test_attach_from_different_pool(struct rte_mempool *pktmbuf_pool,
 		rte_pktmbuf_free(clone2);
 	return -1;
 }
-#undef GOTO_FAIL
 
 /*
  * test allocation and free of mbufs
@@ -1449,6 +1449,143 @@ test_tx_offload(void)
 	return (v1 == v2) ? 0 : -EINVAL;
 }
 
+static int
+test_mbuf_dyn(struct rte_mempool *pktmbuf_pool)
+{
+	const struct rte_mbuf_dynfield dynfield = {
+		.name = "test-dynfield",
+		.size = sizeof(uint8_t),
+		.align = __alignof__(uint8_t),
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynfield dynfield2 = {
+		.name = "test-dynfield2",
+		.size = sizeof(uint16_t),
+		.align = __alignof__(uint16_t),
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynfield dynfield3 = {
+		.name = "test-dynfield3",
+		.size = sizeof(uint8_t),
+		.align = __alignof__(uint8_t),
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynfield dynfield_fail_big = {
+		.name = "test-dynfield-fail-big",
+		.size = 256,
+		.align = 1,
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynfield dynfield_fail_align = {
+		.name = "test-dynfield-fail-align",
+		.size = 1,
+		.align = 3,
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynflag dynflag = {
+		.name = "test-dynflag",
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynflag dynflag2 = {
+		.name = "test-dynflag2",
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynflag dynflag3 = {
+		.name = "test-dynflag3",
+		.flags = 0,
+	};
+	struct rte_mbuf *m = NULL;
+	int offset, offset2, offset3;
+	int flag, flag2, flag3;
+	int ret;
+
+	printf("Test mbuf dynamic fields and flags\n");
+	rte_mbuf_dyn_dump(stdout);
+
+	offset = rte_mbuf_dynfield_register(&dynfield);
+	if (offset == -1)
+		GOTO_FAIL("failed to register dynamic field, offset=%d: %s",
+			offset, strerror(errno));
+
+	ret = rte_mbuf_dynfield_register(&dynfield);
+	if (ret != offset)
+		GOTO_FAIL("failed to lookup dynamic field, ret=%d: %s",
+			ret, strerror(errno));
+
+	offset2 = rte_mbuf_dynfield_register(&dynfield2);
+	if (offset2 == -1 || offset2 == offset || (offset2 & 1))
+		GOTO_FAIL("failed to register dynamic field 2, offset2=%d: %s",
+			offset2, strerror(errno));
+
+	offset3 = rte_mbuf_dynfield_register_offset(&dynfield3,
+				offsetof(struct rte_mbuf, dynfield1[1]));
+	if (offset3 != offsetof(struct rte_mbuf, dynfield1[1]))
+		GOTO_FAIL("failed to register dynamic field 3, offset=%d: %s",
+			offset3, strerror(errno));
+
+	printf("dynfield: offset=%d, offset2=%d, offset3=%d\n",
+		offset, offset2, offset3);
+
+	ret = rte_mbuf_dynfield_register(&dynfield_fail_big);
+	if (ret != -1)
+		GOTO_FAIL("dynamic field creation should fail (too big)");
+
+	ret = rte_mbuf_dynfield_register(&dynfield_fail_align);
+	if (ret != -1)
+		GOTO_FAIL("dynamic field creation should fail (bad alignment)");
+
+	ret = rte_mbuf_dynfield_register_offset(&dynfield_fail_align,
+				offsetof(struct rte_mbuf, ol_flags));
+	if (ret != -1)
+		GOTO_FAIL("dynamic field creation should fail (not avail)");
+
+	flag = rte_mbuf_dynflag_register(&dynflag);
+	if (flag == -1)
+		GOTO_FAIL("failed to register dynamic flag, flag=%d: %s",
+			flag, strerror(errno));
+
+	ret = rte_mbuf_dynflag_register(&dynflag);
+	if (ret != flag)
+		GOTO_FAIL("failed to lookup dynamic flag, ret=%d: %s",
+			ret, strerror(errno));
+
+	flag2 = rte_mbuf_dynflag_register(&dynflag2);
+	if (flag2 == -1 || flag2 == flag)
+		GOTO_FAIL("failed to register dynamic flag 2, flag2=%d: %s",
+			flag2, strerror(errno));
+
+	flag3 = rte_mbuf_dynflag_register_bitnum(&dynflag3,
+						rte_bsf64(PKT_LAST_FREE));
+	if (flag3 != rte_bsf64(PKT_LAST_FREE))
+		GOTO_FAIL("failed to register dynamic flag 3, flag2=%d: %s",
+			flag3, strerror(errno));
+
+	printf("dynflag: flag=%d, flag2=%d, flag3=%d\n", flag, flag2, flag3);
+
+	/* set, get dynamic field */
+	m = rte_pktmbuf_alloc(pktmbuf_pool);
+	if (m == NULL)
+		GOTO_FAIL("Cannot allocate mbuf");
+
+	*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) = 1;
+	if (*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) != 1)
+		GOTO_FAIL("failed to read dynamic field");
+	*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) = 1000;
+	if (*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) != 1000)
+		GOTO_FAIL("failed to read dynamic field");
+
+	/* set a dynamic flag */
+	m->ol_flags |= (1ULL << flag);
+
+	rte_mbuf_dyn_dump(stdout);
+	rte_pktmbuf_free(m);
+	return 0;
+fail:
+	rte_pktmbuf_free(m);
+	return -1;
+}
+#undef GOTO_FAIL
+
 static int
 test_mbuf(void)
 {
@@ -1468,6 +1605,12 @@ test_mbuf(void)
 		goto err;
 	}
 
+	/* test registration of dynamic fields and flags */
+	if (test_mbuf_dyn(pktmbuf_pool) < 0) {
+		printf("mbuf dynflag test failed\n");
+		goto err;
+	}
+
 	/* create a specific pktmbuf pool with a priv_size != 0 and no data
 	 * room size */
 	pktmbuf_pool2 = rte_pktmbuf_pool_create("test_pktmbuf_pool2",
diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst
index 856088c5c..b7511a6dc 100644
--- a/doc/guides/rel_notes/release_19_11.rst
+++ b/doc/guides/rel_notes/release_19_11.rst
@@ -21,6 +21,13 @@ DPDK Release 19.11
 
       xdg-open build/doc/html/guides/rel_notes/release_19_11.html
 
+* **Add support of support dynamic fields and flags in mbuf.**
+
+  This new feature adds the ability to dynamically register some room
+  for a field or a flag in the mbuf structure. This is typically used
+  for specific offload features, where adding a static field or flag
+  in the mbuf is not justified.
+
 
 New Features
 ------------
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index 05a3a6401..6660c77e4 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -630,6 +630,18 @@ rte_log2_u64(uint64_t v)
 		})
 #endif
 
+/**
+ * Get the size of a field in a structure.
+ *
+ * @param type
+ *   The type of the structure.
+ * @param field
+ *   The field in the structure.
+ * @return
+ *   The size of the field in the structure, in bytes.
+ */
+#define RTE_SIZEOF_FIELD(type, field) (sizeof(((type *)0)->field))
+
 #define _RTE_STR(x) #x
 /** Take a macro value and get a string version of it */
 #define RTE_STR(x) _RTE_STR(x)
diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile
index c8f6d2689..5a9bcee73 100644
--- a/lib/librte_mbuf/Makefile
+++ b/lib/librte_mbuf/Makefile
@@ -17,8 +17,10 @@ LIBABIVER := 5
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c rte_mbuf_pool_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_MBUF) += rte_mbuf_dyn.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h rte_mbuf_ptype.h rte_mbuf_pool_ops.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include += rte_mbuf_dyn.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build
index 6cc11ebb4..9137e8f26 100644
--- a/lib/librte_mbuf/meson.build
+++ b/lib/librte_mbuf/meson.build
@@ -2,8 +2,10 @@
 # Copyright(c) 2017 Intel Corporation
 
 version = 5
-sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c')
-headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
+sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c',
+	'rte_mbuf_dyn.c')
+headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h',
+	'rte_mbuf_dyn.h')
 deps += ['mempool']
 
 allow_experimental_apis = true
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index b1a92b17a..7567b6ff3 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -198,9 +198,12 @@ extern "C" {
 #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
 #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
 
-/* add new RX flags here */
+/* add new RX flags here, don't forget to update PKT_FIRST_FREE */
 
-/* add new TX flags here */
+#define PKT_FIRST_FREE (1ULL << 23)
+#define PKT_LAST_FREE (1ULL << 39)
+
+/* add new TX flags here, don't forget to update PKT_LAST_FREE  */
 
 /**
  * Indicate that the metadata field in the mbuf is in use.
@@ -738,6 +741,7 @@ struct rte_mbuf {
 	 */
 	struct rte_mbuf_ext_shared_info *shinfo;
 
+	uint64_t dynfield1[2]; /**< Reserved for dynamic fields. */
 } __rte_cache_aligned;
 
 /**
@@ -1684,6 +1688,20 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
  */
 #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
 
+/**
+ * Copy dynamic fields from msrc to mdst.
+ *
+ * @param mdst
+ *   The destination mbuf.
+ * @param msrc
+ *   The source mbuf.
+ */
+static inline void
+rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
+{
+	memcpy(&mdst->dynfield1, msrc->dynfield1, sizeof(mdst->dynfield1));
+}
+
 /* internal */
 static inline void
 __rte_pktmbuf_copy_hdr(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
@@ -1695,6 +1713,7 @@ __rte_pktmbuf_copy_hdr(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
 	mdst->hash = msrc->hash;
 	mdst->packet_type = msrc->packet_type;
 	mdst->timestamp = msrc->timestamp;
+	rte_mbuf_dynfield_copy(mdst, msrc);
 }
 
 /**
diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c b/lib/librte_mbuf/rte_mbuf_dyn.c
new file mode 100644
index 000000000..d6931f847
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf_dyn.c
@@ -0,0 +1,553 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 6WIND S.A.
+ */
+
+#include <sys/queue.h>
+#include <stdint.h>
+#include <limits.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_tailq.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+#include <rte_mbuf.h>
+#include <rte_mbuf_dyn.h>
+
+#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
+
+struct mbuf_dynfield_elt {
+	TAILQ_ENTRY(mbuf_dynfield_elt) next;
+	struct rte_mbuf_dynfield params;
+	size_t offset;
+};
+TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
+
+static struct rte_tailq_elem mbuf_dynfield_tailq = {
+	.name = "RTE_MBUF_DYNFIELD",
+};
+EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
+
+struct mbuf_dynflag_elt {
+	TAILQ_ENTRY(mbuf_dynflag_elt) next;
+	struct rte_mbuf_dynflag params;
+	unsigned int bitnum;
+};
+TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
+
+static struct rte_tailq_elem mbuf_dynflag_tailq = {
+	.name = "RTE_MBUF_DYNFLAG",
+};
+EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
+
+struct mbuf_dyn_shm {
+	/**
+	 * For each mbuf byte, free_space[i] != 0 if space is free.
+	 * The value is the size of the biggest aligned element that
+	 * can fit in the zone.
+	 */
+	uint8_t free_space[sizeof(struct rte_mbuf)];
+	/** Bitfield of available flags. */
+	uint64_t free_flags;
+};
+static struct mbuf_dyn_shm *shm;
+
+/* Set the value of free_space[] according to the size and alignment of
+ * the free areas. This helps to select the best place when reserving a
+ * dynamic field. Assume tailq is locked.
+ */
+static void
+process_score(void)
+{
+	size_t off, align, size, i;
+
+	/* first, erase previous info */
+	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
+		if (shm->free_space[i])
+			shm->free_space[i] = 1;
+	}
+
+	for (off = 0; off < sizeof(struct rte_mbuf); off++) {
+		/* get the size of the free zone */
+		for (size = 0; shm->free_space[off + size]; size++)
+			;
+		if (size == 0)
+			continue;
+
+		/* get the alignment of biggest object that can fit in
+		 * the zone at this offset.
+		 */
+		for (align = 1;
+		     (off % (align << 1)) == 0 && (align << 1) <= size;
+		     align <<= 1)
+			;
+
+		/* save it in free_space[] */
+		for (i = off; i < off + size; i++)
+			shm->free_space[i] = RTE_MAX(align, shm->free_space[i]);
+	}
+}
+
+/* Mark the area occupied by a mbuf field as available in the shm. */
+#define mark_free(field)						\
+	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
+		1, sizeof(((struct rte_mbuf *)0)->field))
+
+/* Allocate and initialize the shared memory. Assume tailq is locked */
+static int
+init_shared_mem(void)
+{
+	const struct rte_memzone *mz;
+	uint64_t mask;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
+						sizeof(struct mbuf_dyn_shm),
+						SOCKET_ID_ANY, 0,
+						RTE_CACHE_LINE_SIZE);
+	} else {
+		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
+	}
+	if (mz == NULL)
+		return -1;
+
+	shm = mz->addr;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		/* init free_space, keep it sync'd with
+		 * rte_mbuf_dynfield_copy().
+		 */
+		memset(shm, 0, sizeof(*shm));
+		mark_free(dynfield1);
+
+		/* init free_flags */
+		for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask <<= 1)
+			shm->free_flags |= mask;
+
+		process_score();
+	}
+
+	return 0;
+}
+
+/* check if this offset can be used */
+static int
+check_offset(size_t offset, size_t size, size_t align)
+{
+	size_t i;
+
+	if ((offset & (align - 1)) != 0)
+		return -1;
+	if (offset + size > sizeof(struct rte_mbuf))
+		return -1;
+
+	for (i = 0; i < size; i++) {
+		if (!shm->free_space[i + offset])
+			return -1;
+	}
+
+	return 0;
+}
+
+/* assume tailq is locked */
+static struct mbuf_dynfield_elt *
+__mbuf_dynfield_lookup(const char *name)
+{
+	struct mbuf_dynfield_list *mbuf_dynfield_list;
+	struct mbuf_dynfield_elt *mbuf_dynfield;
+	struct rte_tailq_entry *te;
+
+	mbuf_dynfield_list = RTE_TAILQ_CAST(
+		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
+
+	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
+		mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
+		if (strcmp(name, mbuf_dynfield->params.name) == 0)
+			break;
+	}
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return mbuf_dynfield;
+}
+
+int
+rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield *params)
+{
+	struct mbuf_dynfield_elt *mbuf_dynfield;
+
+	if (shm == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	rte_mcfg_tailq_read_lock();
+	mbuf_dynfield = __mbuf_dynfield_lookup(name);
+	rte_mcfg_tailq_read_unlock();
+
+	if (mbuf_dynfield == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	if (params != NULL)
+		memcpy(params, &mbuf_dynfield->params, sizeof(*params));
+
+	return mbuf_dynfield->offset;
+}
+
+static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
+		const struct rte_mbuf_dynfield *params2)
+{
+	if (strcmp(params1->name, params2->name))
+		return -1;
+	if (params1->size != params2->size)
+		return -1;
+	if (params1->align != params2->align)
+		return -1;
+	if (params1->flags != params2->flags)
+		return -1;
+	return 0;
+}
+
+/* assume tailq is locked */
+static int
+__rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
+				size_t req)
+{
+	struct mbuf_dynfield_list *mbuf_dynfield_list;
+	struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
+	struct rte_tailq_entry *te = NULL;
+	unsigned int best_zone = UINT_MAX;
+	size_t i, offset;
+	int ret;
+
+	if (shm == NULL && init_shared_mem() < 0)
+		return -1;
+
+	mbuf_dynfield = __mbuf_dynfield_lookup(params->name);
+	if (mbuf_dynfield != NULL) {
+		if (req != SIZE_MAX && req != mbuf_dynfield->offset) {
+			rte_errno = EEXIST;
+			return -1;
+		}
+		if (mbuf_dynfield_cmp(params, &mbuf_dynfield->params) < 0) {
+			rte_errno = EEXIST;
+			return -1;
+		}
+		return mbuf_dynfield->offset;
+	}
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		rte_errno = EPERM;
+		return -1;
+	}
+
+	if (req == SIZE_MAX) {
+		/* Find the best place to put this field: we search the
+		 * lowest value of shm->free_space[offset]: the zones
+		 * containing room for larger fields are kept for later.
+		 */
+		for (offset = 0;
+		     offset < sizeof(struct rte_mbuf);
+		     offset++) {
+			if (check_offset(offset, params->size,
+						params->align) == 0 &&
+					shm->free_space[offset] < best_zone) {
+				best_zone = shm->free_space[offset];
+				req = offset;
+			}
+		}
+		if (req == SIZE_MAX) {
+			rte_errno = ENOENT;
+			return -1;
+		}
+	} else {
+		if (check_offset(req, params->size, params->align) < 0) {
+			rte_errno = EBUSY;
+			return -1;
+		}
+	}
+
+	offset = req;
+	mbuf_dynfield_list = RTE_TAILQ_CAST(
+		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
+
+	te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL)
+		return -1;
+
+	mbuf_dynfield = rte_zmalloc("mbuf_dynfield", sizeof(*mbuf_dynfield), 0);
+	if (mbuf_dynfield == NULL) {
+		rte_free(te);
+		return -1;
+	}
+
+	ret = strlcpy(mbuf_dynfield->params.name, params->name,
+		sizeof(mbuf_dynfield->params.name));
+	if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->params.name)) {
+		rte_errno = ENAMETOOLONG;
+		rte_free(mbuf_dynfield);
+		rte_free(te);
+		return -1;
+	}
+	memcpy(&mbuf_dynfield->params, params, sizeof(mbuf_dynfield->params));
+	mbuf_dynfield->offset = offset;
+	te->data = mbuf_dynfield;
+
+	TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
+
+	for (i = offset; i < offset + params->size; i++)
+		shm->free_space[i] = 0;
+	process_score();
+
+	RTE_LOG(DEBUG, MBUF, "Registered dynamic field %s (sz=%zu, al=%zu, fl=0x%x) -> %zd\n",
+		params->name, params->size, params->align, params->flags,
+		offset);
+
+	return offset;
+}
+
+int
+rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
+				size_t req)
+{
+	int ret;
+
+	if (params->size >= sizeof(struct rte_mbuf)) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	if (!rte_is_power_of_2(params->align)) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	if (params->flags != 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	rte_mcfg_tailq_write_lock();
+	ret = __rte_mbuf_dynfield_register_offset(params, req);
+	rte_mcfg_tailq_write_unlock();
+
+	return ret;
+}
+
+int
+rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params)
+{
+	return rte_mbuf_dynfield_register_offset(params, SIZE_MAX);
+}
+
+/* assume tailq is locked */
+static struct mbuf_dynflag_elt *
+__mbuf_dynflag_lookup(const char *name)
+{
+	struct mbuf_dynflag_list *mbuf_dynflag_list;
+	struct mbuf_dynflag_elt *mbuf_dynflag;
+	struct rte_tailq_entry *te;
+
+	mbuf_dynflag_list = RTE_TAILQ_CAST(
+		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
+
+	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
+		mbuf_dynflag = (struct mbuf_dynflag_elt *)te->data;
+		if (strncmp(name, mbuf_dynflag->params.name,
+				RTE_MBUF_DYN_NAMESIZE) == 0)
+			break;
+	}
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return mbuf_dynflag;
+}
+
+int
+rte_mbuf_dynflag_lookup(const char *name,
+			struct rte_mbuf_dynflag *params)
+{
+	struct mbuf_dynflag_elt *mbuf_dynflag;
+
+	if (shm == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	rte_mcfg_tailq_read_lock();
+	mbuf_dynflag = __mbuf_dynflag_lookup(name);
+	rte_mcfg_tailq_read_unlock();
+
+	if (mbuf_dynflag == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	if (params != NULL)
+		memcpy(params, &mbuf_dynflag->params, sizeof(*params));
+
+	return mbuf_dynflag->bitnum;
+}
+
+static int mbuf_dynflag_cmp(const struct rte_mbuf_dynflag *params1,
+		const struct rte_mbuf_dynflag *params2)
+{
+	if (strcmp(params1->name, params2->name))
+		return -1;
+	if (params1->flags != params2->flags)
+		return -1;
+	return 0;
+}
+
+/* assume tailq is locked */
+static int
+__rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
+				unsigned int req)
+{
+	struct mbuf_dynflag_list *mbuf_dynflag_list;
+	struct mbuf_dynflag_elt *mbuf_dynflag = NULL;
+	struct rte_tailq_entry *te = NULL;
+	unsigned int bitnum;
+	int ret;
+
+	if (shm == NULL && init_shared_mem() < 0)
+		return -1;
+
+	mbuf_dynflag = __mbuf_dynflag_lookup(params->name);
+	if (mbuf_dynflag != NULL) {
+		if (req != UINT_MAX && req != mbuf_dynflag->bitnum) {
+			rte_errno = EEXIST;
+			return -1;
+		}
+		if (mbuf_dynflag_cmp(params, &mbuf_dynflag->params) < 0) {
+			rte_errno = EEXIST;
+			return -1;
+		}
+		return mbuf_dynflag->bitnum;
+	}
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		rte_errno = EPERM;
+		return -1;
+	}
+
+	if (req == UINT_MAX) {
+		if (shm->free_flags == 0) {
+			rte_errno = ENOENT;
+			return -1;
+		}
+		bitnum = rte_bsf64(shm->free_flags);
+	} else {
+		if ((shm->free_flags & (1ULL << req)) == 0) {
+			rte_errno = EBUSY;
+			return -1;
+		}
+		bitnum = req;
+	}
+
+	mbuf_dynflag_list = RTE_TAILQ_CAST(
+		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
+
+	te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL)
+		return -1;
+
+	mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag), 0);
+	if (mbuf_dynflag == NULL) {
+		rte_free(te);
+		return -1;
+	}
+
+	ret = strlcpy(mbuf_dynflag->params.name, params->name,
+		sizeof(mbuf_dynflag->params.name));
+	if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->params.name)) {
+		rte_free(mbuf_dynflag);
+		rte_free(te);
+		rte_errno = ENAMETOOLONG;
+		return -1;
+	}
+	mbuf_dynflag->bitnum = bitnum;
+	te->data = mbuf_dynflag;
+
+	TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
+
+	shm->free_flags &= ~(1ULL << bitnum);
+
+	RTE_LOG(DEBUG, MBUF, "Registered dynamic flag %s (fl=0x%x) -> %u\n",
+		params->name, params->flags, bitnum);
+
+	return bitnum;
+}
+
+int
+rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
+				unsigned int req)
+{
+	int ret;
+
+	if (req >= RTE_SIZEOF_FIELD(struct rte_mbuf, ol_flags) * CHAR_BIT &&
+			req != UINT_MAX) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	rte_mcfg_tailq_write_lock();
+	ret = __rte_mbuf_dynflag_register_bitnum(params, req);
+	rte_mcfg_tailq_write_unlock();
+
+	return ret;
+}
+
+int
+rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params)
+{
+	return rte_mbuf_dynflag_register_bitnum(params, UINT_MAX);
+}
+
+void rte_mbuf_dyn_dump(FILE *out)
+{
+	struct mbuf_dynfield_list *mbuf_dynfield_list;
+	struct mbuf_dynfield_elt *dynfield;
+	struct mbuf_dynflag_list *mbuf_dynflag_list;
+	struct mbuf_dynflag_elt *dynflag;
+	struct rte_tailq_entry *te;
+	size_t i;
+
+	rte_mcfg_tailq_write_lock();
+	init_shared_mem();
+	fprintf(out, "Reserved fields:\n");
+	mbuf_dynfield_list = RTE_TAILQ_CAST(
+		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
+	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
+		dynfield = (struct mbuf_dynfield_elt *)te->data;
+		fprintf(out, "  name=%s offset=%zd size=%zd align=%zd flags=%x\n",
+			dynfield->params.name, dynfield->offset,
+			dynfield->params.size, dynfield->params.align,
+			dynfield->params.flags);
+	}
+	fprintf(out, "Reserved flags:\n");
+	mbuf_dynflag_list = RTE_TAILQ_CAST(
+		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
+	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
+		dynflag = (struct mbuf_dynflag_elt *)te->data;
+		fprintf(out, "  name=%s bitnum=%u flags=%x\n",
+			dynflag->params.name, dynflag->bitnum,
+			dynflag->params.flags);
+	}
+	fprintf(out, "Free space in mbuf (0 = free, value = zone alignment):\n");
+	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
+		if ((i % 8) == 0)
+			fprintf(out, "  %4.4zx: ", i);
+		fprintf(out, "%2.2x%s", shm->free_space[i],
+			(i % 8 != 7) ? " " : "\n");
+	}
+	rte_mcfg_tailq_write_unlock();
+}
diff --git a/lib/librte_mbuf/rte_mbuf_dyn.h b/lib/librte_mbuf/rte_mbuf_dyn.h
new file mode 100644
index 000000000..2e9d418cf
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf_dyn.h
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 6WIND S.A.
+ */
+
+#ifndef _RTE_MBUF_DYN_H_
+#define _RTE_MBUF_DYN_H_
+
+/**
+ * @file
+ * RTE Mbuf dynamic fields and flags
+ *
+ * Many DPDK features require to store data inside the mbuf. As the room
+ * in mbuf structure is limited, it is not possible to have a field for
+ * each feature. Also, changing fields in the mbuf structure can break
+ * the API or ABI.
+ *
+ * This module addresses this issue, by enabling the dynamic
+ * registration of fields or flags:
+ *
+ * - a dynamic field is a named area in the rte_mbuf structure, with a
+ *   given size (>= 1 byte) and alignment constraint.
+ * - a dynamic flag is a named bit in the rte_mbuf structure, stored
+ *   in mbuf->ol_flags.
+ *
+ * The placement of the field or flag can be automatic, in this case the
+ * zones that have the smallest size and alignment constraint are
+ * selected in priority. Else, a specific field offset or flag bit
+ * number can be requested through the API.
+ *
+ * The typical use case is when a specific offload feature requires to
+ * register a dedicated offload field in the mbuf structure, and adding
+ * a static field or flag is not justified.
+ *
+ * Example of use:
+ *
+ * - A rte_mbuf_dynfield structure is defined, containing the parameters
+ *   of the dynamic field to be registered:
+ *   const struct rte_mbuf_dynfield rte_dynfield_my_feature = { ... };
+ * - The application initializes the PMD, and asks for this feature
+ *   at port initialization by passing DEV_RX_OFFLOAD_MY_FEATURE in
+ *   rxconf. This will make the PMD to register the field by calling
+ *   rte_mbuf_dynfield_register(&rte_dynfield_my_feature). The PMD
+ *   stores the returned offset.
+ * - The application that uses the offload feature also registers
+ *   the field to retrieve the same offset.
+ * - When the PMD receives a packet, it can set the field:
+ *   *RTE_MBUF_DYNFIELD(m, offset, <type *>) = value;
+ * - In the main loop, the application can retrieve the value with
+ *   the same macro.
+ *
+ * To avoid wasting space, the dynamic fields or flags must only be
+ * reserved on demand, when an application asks for the related feature.
+ *
+ * The registration can be done at any moment, but it is not possible
+ * to unregister fields or flags for now.
+ *
+ * A dynamic field can be reserved and used by an application only.
+ * It can for instance be a packet mark.
+ *
+ * To avoid namespace collisions, the dynamic mbuf field or flag names
+ * have to be chosen with care. It is advised to use the same
+ * conventions than function names in dpdk:
+ * - "rte_mbuf_dynfield_<name>" if defined in mbuf library
+ * - "rte_<libname>_dynfield_<name>" if defined in another library
+ * - "rte_net_<pmd>_dynfield_<name>" if defined in a in PMD
+ * - any name that does not start with "rte_" in an application
+ */
+
+#include <sys/types.h>
+/**
+ * Maximum length of the dynamic field or flag string.
+ */
+#define RTE_MBUF_DYN_NAMESIZE 64
+
+/**
+ * Structure describing the parameters of a mbuf dynamic field.
+ */
+struct rte_mbuf_dynfield {
+	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the field. */
+	size_t size;        /**< The number of bytes to reserve. */
+	size_t align;       /**< The alignment constraint (power of 2). */
+	unsigned int flags; /**< Reserved for future use, must be 0. */
+};
+
+/**
+ * Structure describing the parameters of a mbuf dynamic flag.
+ */
+struct rte_mbuf_dynflag {
+	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the dynamic flag. */
+	unsigned int flags; /**< Reserved for future use, must be 0. */
+};
+
+/**
+ * Register space for a dynamic field in the mbuf structure.
+ *
+ * If the field is already registered (same name and parameters), its
+ * offset is returned.
+ *
+ * @param params
+ *   A structure containing the requested parameters (name, size,
+ *   alignment constraint and flags).
+ * @return
+ *   The offset in the mbuf structure, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - EINVAL: invalid parameters (size, align, or flags).
+ *   - EEXIST: this name is already register with different parameters.
+ *   - EPERM: called from a secondary process.
+ *   - ENOENT: not enough room in mbuf.
+ *   - ENOMEM: allocation failure.
+ *   - ENAMETOOLONG: name does not ends with \0.
+ */
+__rte_experimental
+int rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params);
+
+/**
+ * Register space for a dynamic field in the mbuf structure at offset.
+ *
+ * If the field is already registered (same name, parameters and offset),
+ * the offset is returned.
+ *
+ * @param params
+ *   A structure containing the requested parameters (name, size,
+ *   alignment constraint and flags).
+ * @param offset
+ *   The requested offset. Ignored if SIZE_MAX is passed.
+ * @return
+ *   The offset in the mbuf structure, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - EINVAL: invalid parameters (size, align, flags, or offset).
+ *   - EEXIST: this name is already register with different parameters.
+ *   - EBUSY: the requested offset cannot be used.
+ *   - EPERM: called from a secondary process.
+ *   - ENOENT: not enough room in mbuf.
+ *   - ENOMEM: allocation failure.
+ *   - ENAMETOOLONG: name does not ends with \0.
+ */
+__rte_experimental
+int rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
+				size_t offset);
+
+/**
+ * Lookup for a registered dynamic mbuf field.
+ *
+ * @param name
+ *   A string identifying the dynamic field.
+ * @param params
+ *   If not NULL, and if the lookup is successful, the structure is
+ *   filled with the parameters of the dynamic field.
+ * @return
+ *   The offset of this field in the mbuf structure, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - ENOENT: no dynamic field matches this name.
+ */
+__rte_experimental
+int rte_mbuf_dynfield_lookup(const char *name,
+			struct rte_mbuf_dynfield *params);
+
+/**
+ * Register a dynamic flag in the mbuf structure.
+ *
+ * If the flag is already registered (same name and parameters), its
+ * bitnum is returned.
+ *
+ * @param params
+ *   A structure containing the requested parameters of the dynamic
+ *   flag (name and options).
+ * @return
+ *   The number of the reserved bit, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - EINVAL: invalid parameters (size, align, or flags).
+ *   - EEXIST: this name is already register with different parameters.
+ *   - EPERM: called from a secondary process.
+ *   - ENOENT: no more flag available.
+ *   - ENOMEM: allocation failure.
+ *   - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE - 1.
+ */
+__rte_experimental
+int rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params);
+
+/**
+ * Register a dynamic flag in the mbuf structure specifying bitnum.
+ *
+ * If the flag is already registered (same name, parameters and bitnum),
+ * the bitnum is returned.
+ *
+ * @param params
+ *   A structure containing the requested parameters of the dynamic
+ *   flag (name and options).
+ * @param bitnum
+ *   The requested bitnum. Ignored if UINT_MAX is passed.
+ * @return
+ *   The number of the reserved bit, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - EINVAL: invalid parameters (size, align, or flags).
+ *   - EEXIST: this name is already register with different parameters.
+ *   - EBUSY: the requested bitnum cannot be used.
+ *   - EPERM: called from a secondary process.
+ *   - ENOENT: no more flag available.
+ *   - ENOMEM: allocation failure.
+ *   - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE - 1.
+ */
+__rte_experimental
+int rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
+				unsigned int bitnum);
+
+/**
+ * Lookup for a registered dynamic mbuf flag.
+ *
+ * @param name
+ *   A string identifying the dynamic flag.
+ * @param params
+ *   If not NULL, and if the lookup is successful, the structure is
+ *   filled with the parameters of the dynamic flag.
+ * @return
+ *   The offset of this flag in the mbuf structure, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - ENOENT: no dynamic flag matches this name.
+ */
+__rte_experimental
+int rte_mbuf_dynflag_lookup(const char *name,
+			struct rte_mbuf_dynflag *params);
+
+/**
+ * Helper macro to access to a dynamic field.
+ */
+#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) + (offset)))
+
+/**
+ * Dump the status of dynamic fields and flags.
+ *
+ * @param out
+ *   The stream where the status is displayed.
+ */
+__rte_experimental
+void rte_mbuf_dyn_dump(FILE *out);
+
+/* Placeholder for dynamic fields and flags declarations. */
+
+#endif
diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
index a4f41d7fd..263dc0a21 100644
--- a/lib/librte_mbuf/rte_mbuf_version.map
+++ b/lib/librte_mbuf/rte_mbuf_version.map
@@ -58,6 +58,13 @@ EXPERIMENTAL {
 	global:
 
 	rte_mbuf_check;
+	rte_mbuf_dynfield_lookup;
+	rte_mbuf_dynfield_register;
+	rte_mbuf_dynfield_register_offset;
+	rte_mbuf_dynflag_lookup;
+	rte_mbuf_dynflag_register;
+	rte_mbuf_dynflag_register_bitnum;
+	rte_mbuf_dyn_dump;
 	rte_pktmbuf_copy;
 	rte_pktmbuf_free_bulk;
 
-- 
2.20.1


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v3] mbuf: support dynamic fields and flags
  2019-10-24  8:13 ` [dpdk-dev] [PATCH v3] " Olivier Matz
@ 2019-10-24 15:30   ` Stephen Hemminger
  2019-10-24 15:44     ` Thomas Monjalon
  2019-10-24 16:40   ` Thomas Monjalon
  1 sibling, 1 reply; 64+ messages in thread
From: Stephen Hemminger @ 2019-10-24 15:30 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Andrew Rybchenko, Bruce Richardson, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Shahaf Shuler, Thomas Monjalon,
	Slava Ovsiienko

On Thu, 24 Oct 2019 10:13:37 +0200
Olivier Matz <olivier.matz@6wind.com> wrote:

>  
> +/**
> + * Copy dynamic fields from msrc to mdst.
> + *
> + * @param mdst
> + *   The destination mbuf.
> + * @param msrc
> + *   The source mbuf.
> + */
> +static inline void
> +rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
> +{
> +	memcpy(&mdst->dynfield1, msrc->dynfield1, sizeof(mdst->dynfield1));
> +}

Since size is fixed and both are of same type, why not just assign structures?

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v3] mbuf: support dynamic fields and flags
  2019-10-24 15:30   ` Stephen Hemminger
@ 2019-10-24 15:44     ` Thomas Monjalon
  2019-10-24 17:07       ` Stephen Hemminger
  0 siblings, 1 reply; 64+ messages in thread
From: Thomas Monjalon @ 2019-10-24 15:44 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, Olivier Matz, Andrew Rybchenko, Bruce Richardson, Wang,
	Haiyue, Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev,
	Konstantin, Morten Brørup, Shahaf Shuler, Slava Ovsiienko

24/10/2019 17:30, Stephen Hemminger:
> On Thu, 24 Oct 2019 10:13:37 +0200
> Olivier Matz <olivier.matz@6wind.com> wrote:
> 
> > +/**
> > + * Copy dynamic fields from msrc to mdst.
> > + *
> > + * @param mdst
> > + *   The destination mbuf.
> > + * @param msrc
> > + *   The source mbuf.
> > + */
> > +static inline void
> > +rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
> > +{
> > +	memcpy(&mdst->dynfield1, msrc->dynfield1, sizeof(mdst->dynfield1));
> > +}
> 
> Since size is fixed and both are of same type, why not just assign structures?

Because they are not structures?
	uint64_t dynfield1[2];



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v3] mbuf: support dynamic fields and flags
  2019-10-24  8:13 ` [dpdk-dev] [PATCH v3] " Olivier Matz
  2019-10-24 15:30   ` Stephen Hemminger
@ 2019-10-24 16:40   ` Thomas Monjalon
  1 sibling, 0 replies; 64+ messages in thread
From: Thomas Monjalon @ 2019-10-24 16:40 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Andrew Rybchenko, Bruce Richardson, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Shahaf Shuler, Stephen Hemminger,
	Slava Ovsiienko

24/10/2019 10:13, Olivier Matz:
> Many features require to store data inside the mbuf. As the room in mbuf
> structure is limited, it is not possible to have a field for each
> feature. Also, changing fields in the mbuf structure can break the API
> or ABI.
> 
> This commit addresses these issues, by enabling the dynamic registration
> of fields or flags:
> 
> - a dynamic field is a named area in the rte_mbuf structure, with a
>   given size (>= 1 byte) and alignment constraint.
> - a dynamic flag is a named bit in the rte_mbuf structure.
> 
> The typical use case is a PMD that registers space for an offload
> feature, when the application requests to enable this feature.  As
> the space in mbuf is limited, the space should only be reserved if it
> is going to be used (i.e when the application explicitly asks for it).
> 
> The registration can be done at any moment, but it is not possible
> to unregister fields or flags.
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> Acked-by: Thomas Monjalon <thomas@monjalon.net>
> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

I feel I could merge this patch.
I will hold on for few hours and will proceed.




^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v3] mbuf: support dynamic fields and flags
  2019-10-24 15:44     ` Thomas Monjalon
@ 2019-10-24 17:07       ` Stephen Hemminger
  0 siblings, 0 replies; 64+ messages in thread
From: Stephen Hemminger @ 2019-10-24 17:07 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: dev, Olivier Matz, Andrew Rybchenko, Bruce Richardson, Wang,
	Haiyue, Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev,
	Konstantin, Morten Brørup, Shahaf Shuler, Slava Ovsiienko

On Thu, 24 Oct 2019 17:44:09 +0200
Thomas Monjalon <thomas@monjalon.net> wrote:

> 24/10/2019 17:30, Stephen Hemminger:
> > On Thu, 24 Oct 2019 10:13:37 +0200
> > Olivier Matz <olivier.matz@6wind.com> wrote:
> >   
> > > +/**
> > > + * Copy dynamic fields from msrc to mdst.
> > > + *
> > > + * @param mdst
> > > + *   The destination mbuf.
> > > + * @param msrc
> > > + *   The source mbuf.
> > > + */
> > > +static inline void
> > > +rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
> > > +{
> > > +	memcpy(&mdst->dynfield1, msrc->dynfield1, sizeof(mdst->dynfield1));
> > > +}  
> > 
> > Since size is fixed and both are of same type, why not just assign structures?  
> 
> Because they are not structures?
> 	uint64_t dynfield1[2];

your right. and gcc will emit simple stores for this.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [dpdk-dev] [PATCH v4] mbuf: support dynamic fields and flags
  2019-07-10  9:29 [dpdk-dev] [RFC] mbuf: support dynamic fields and flags Olivier Matz
                   ` (6 preceding siblings ...)
  2019-10-24  8:13 ` [dpdk-dev] [PATCH v3] " Olivier Matz
@ 2019-10-26 12:39 ` Olivier Matz
  2019-10-26 17:04   ` Thomas Monjalon
  7 siblings, 1 reply; 64+ messages in thread
From: Olivier Matz @ 2019-10-26 12:39 UTC (permalink / raw)
  To: dev
  Cc: Andrew Rybchenko, Bruce Richardson, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Shahaf Shuler, Stephen Hemminger,
	Thomas Monjalon, Slava Ovsiienko

Many features require to store data inside the mbuf. As the room in mbuf
structure is limited, it is not possible to have a field for each
feature. Also, changing fields in the mbuf structure can break the API
or ABI.

This commit addresses these issues, by enabling the dynamic registration
of fields or flags:

- a dynamic field is a named area in the rte_mbuf structure, with a
  given size (>= 1 byte) and alignment constraint.
- a dynamic flag is a named bit in the rte_mbuf structure.

The typical use case is a PMD that registers space for an offload
feature, when the application requests to enable this feature.  As
the space in mbuf is limited, the space should only be reserved if it
is going to be used (i.e when the application explicitly asks for it).

The registration can be done at any moment, but it is not possible
to unregister fields or flags.

Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
Acked-by: Thomas Monjalon <thomas@monjalon.net>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---

v4

* rebase and solve conflicts

v3

* define mark_free() macro outside the init_shared_mem() function
  (Konstantin)
* better document automatic field placement (Konstantin)
* introduce RTE_SIZEOF_FIELD() to get the size of a field in
  a structure (Haiyue)
* fix api doc generation (Slava)
* document dynamic field and flags naming conventions

v2

* Rebase on top of master: solve conflict with Stephen's patchset
  (packet copy)
* Add new apis to register a dynamic field/flag at a specific place
* Add a dump function (sugg by David)
* Enhance field registration function to select the best offset, keeping
  large aligned zones as much as possible (sugg by Konstantin)
* Use a size_t and unsigned int instead of int when relevant
  (sugg by Konstantin)
* Use "uint64_t dynfield1[2]" in mbuf instead of 2 uint64_t fields
  (sugg by Konstantin)
* Remove unused argument in private function (sugg by Konstantin)
* Fix and simplify locking (sugg by Konstantin)
* Fix minor typo

rfc -> v1

* Rebase on top of master
* Change registration API to use a structure instead of
  variables, getting rid of #defines (Stephen's comment)
* Update flag registration to use a similar API as fields.
* Change max name length from 32 to 64 (sugg. by Thomas)
* Enhance API documentation (Haiyue's and Andrew's comments)
* Add a debug log at registration
* Add some words in release note
* Did some performance tests (sugg. by Andrew):
  On my platform, reading a dynamic field takes ~3 cycles more
  than a static field, and ~2 cycles more for writing.

 app/test/test_mbuf.c                       | 143 ++++++
 doc/guides/rel_notes/release_19_11.rst     |   7 +
 lib/librte_eal/common/include/rte_common.h |  12 +
 lib/librte_mbuf/Makefile                   |   2 +
 lib/librte_mbuf/meson.build                |   6 +-
 lib/librte_mbuf/rte_mbuf.h                 |  15 +
 lib/librte_mbuf/rte_mbuf_core.h            |   8 +-
 lib/librte_mbuf/rte_mbuf_dyn.c             | 553 +++++++++++++++++++++
 lib/librte_mbuf/rte_mbuf_dyn.h             | 239 +++++++++
 lib/librte_mbuf/rte_mbuf_version.map       |   7 +
 10 files changed, 988 insertions(+), 4 deletions(-)
 create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.c
 create mode 100644 lib/librte_mbuf/rte_mbuf_dyn.h

diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index 9fea312c8..854bc26d8 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -32,6 +32,7 @@
 #include <rte_ether.h>
 #include <rte_ip.h>
 #include <rte_tcp.h>
+#include <rte_mbuf_dyn.h>
 
 #include "test.h"
 
@@ -2411,6 +2412,142 @@ test_pktmbuf_ext_shinfo_init_helper(struct rte_mempool *pktmbuf_pool)
 	return -1;
 }
 
+static int
+test_mbuf_dyn(struct rte_mempool *pktmbuf_pool)
+{
+	const struct rte_mbuf_dynfield dynfield = {
+		.name = "test-dynfield",
+		.size = sizeof(uint8_t),
+		.align = __alignof__(uint8_t),
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynfield dynfield2 = {
+		.name = "test-dynfield2",
+		.size = sizeof(uint16_t),
+		.align = __alignof__(uint16_t),
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynfield dynfield3 = {
+		.name = "test-dynfield3",
+		.size = sizeof(uint8_t),
+		.align = __alignof__(uint8_t),
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynfield dynfield_fail_big = {
+		.name = "test-dynfield-fail-big",
+		.size = 256,
+		.align = 1,
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynfield dynfield_fail_align = {
+		.name = "test-dynfield-fail-align",
+		.size = 1,
+		.align = 3,
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynflag dynflag = {
+		.name = "test-dynflag",
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynflag dynflag2 = {
+		.name = "test-dynflag2",
+		.flags = 0,
+	};
+	const struct rte_mbuf_dynflag dynflag3 = {
+		.name = "test-dynflag3",
+		.flags = 0,
+	};
+	struct rte_mbuf *m = NULL;
+	int offset, offset2, offset3;
+	int flag, flag2, flag3;
+	int ret;
+
+	printf("Test mbuf dynamic fields and flags\n");
+	rte_mbuf_dyn_dump(stdout);
+
+	offset = rte_mbuf_dynfield_register(&dynfield);
+	if (offset == -1)
+		GOTO_FAIL("failed to register dynamic field, offset=%d: %s",
+			offset, strerror(errno));
+
+	ret = rte_mbuf_dynfield_register(&dynfield);
+	if (ret != offset)
+		GOTO_FAIL("failed to lookup dynamic field, ret=%d: %s",
+			ret, strerror(errno));
+
+	offset2 = rte_mbuf_dynfield_register(&dynfield2);
+	if (offset2 == -1 || offset2 == offset || (offset2 & 1))
+		GOTO_FAIL("failed to register dynamic field 2, offset2=%d: %s",
+			offset2, strerror(errno));
+
+	offset3 = rte_mbuf_dynfield_register_offset(&dynfield3,
+				offsetof(struct rte_mbuf, dynfield1[1]));
+	if (offset3 != offsetof(struct rte_mbuf, dynfield1[1]))
+		GOTO_FAIL("failed to register dynamic field 3, offset=%d: %s",
+			offset3, strerror(errno));
+
+	printf("dynfield: offset=%d, offset2=%d, offset3=%d\n",
+		offset, offset2, offset3);
+
+	ret = rte_mbuf_dynfield_register(&dynfield_fail_big);
+	if (ret != -1)
+		GOTO_FAIL("dynamic field creation should fail (too big)");
+
+	ret = rte_mbuf_dynfield_register(&dynfield_fail_align);
+	if (ret != -1)
+		GOTO_FAIL("dynamic field creation should fail (bad alignment)");
+
+	ret = rte_mbuf_dynfield_register_offset(&dynfield_fail_align,
+				offsetof(struct rte_mbuf, ol_flags));
+	if (ret != -1)
+		GOTO_FAIL("dynamic field creation should fail (not avail)");
+
+	flag = rte_mbuf_dynflag_register(&dynflag);
+	if (flag == -1)
+		GOTO_FAIL("failed to register dynamic flag, flag=%d: %s",
+			flag, strerror(errno));
+
+	ret = rte_mbuf_dynflag_register(&dynflag);
+	if (ret != flag)
+		GOTO_FAIL("failed to lookup dynamic flag, ret=%d: %s",
+			ret, strerror(errno));
+
+	flag2 = rte_mbuf_dynflag_register(&dynflag2);
+	if (flag2 == -1 || flag2 == flag)
+		GOTO_FAIL("failed to register dynamic flag 2, flag2=%d: %s",
+			flag2, strerror(errno));
+
+	flag3 = rte_mbuf_dynflag_register_bitnum(&dynflag3,
+						rte_bsf64(PKT_LAST_FREE));
+	if (flag3 != rte_bsf64(PKT_LAST_FREE))
+		GOTO_FAIL("failed to register dynamic flag 3, flag2=%d: %s",
+			flag3, strerror(errno));
+
+	printf("dynflag: flag=%d, flag2=%d, flag3=%d\n", flag, flag2, flag3);
+
+	/* set, get dynamic field */
+	m = rte_pktmbuf_alloc(pktmbuf_pool);
+	if (m == NULL)
+		GOTO_FAIL("Cannot allocate mbuf");
+
+	*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) = 1;
+	if (*RTE_MBUF_DYNFIELD(m, offset, uint8_t *) != 1)
+		GOTO_FAIL("failed to read dynamic field");
+	*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) = 1000;
+	if (*RTE_MBUF_DYNFIELD(m, offset2, uint16_t *) != 1000)
+		GOTO_FAIL("failed to read dynamic field");
+
+	/* set a dynamic flag */
+	m->ol_flags |= (1ULL << flag);
+
+	rte_mbuf_dyn_dump(stdout);
+	rte_pktmbuf_free(m);
+	return 0;
+fail:
+	rte_pktmbuf_free(m);
+	return -1;
+}
+
 static int
 test_mbuf(void)
 {
@@ -2431,6 +2568,12 @@ test_mbuf(void)
 		goto err;
 	}
 
+	/* test registration of dynamic fields and flags */
+	if (test_mbuf_dyn(pktmbuf_pool) < 0) {
+		printf("mbuf dynflag test failed\n");
+		goto err;
+	}
+
 	/* create a specific pktmbuf pool with a priv_size != 0 and no data
 	 * room size */
 	pktmbuf_pool2 = rte_pktmbuf_pool_create("test_pktmbuf_pool2",
diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst
index 2b4cbe6e3..603d618a5 100644
--- a/doc/guides/rel_notes/release_19_11.rst
+++ b/doc/guides/rel_notes/release_19_11.rst
@@ -21,6 +21,13 @@ DPDK Release 19.11
 
       xdg-open build/doc/html/guides/rel_notes/release_19_11.html
 
+* **Add support of support dynamic fields and flags in mbuf.**
+
+  This new feature adds the ability to dynamically register some room
+  for a field or a flag in the mbuf structure. This is typically used
+  for specific offload features, where adding a static field or flag
+  in the mbuf is not justified.
+
 
 New Features
 ------------
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index 7ee94d698..459d082d1 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -675,6 +675,18 @@ rte_log2_u64(uint64_t v)
 		})
 #endif
 
+/**
+ * Get the size of a field in a structure.
+ *
+ * @param type
+ *   The type of the structure.
+ * @param field
+ *   The field in the structure.
+ * @return
+ *   The size of the field in the structure, in bytes.
+ */
+#define RTE_SIZEOF_FIELD(type, field) (sizeof(((type *)0)->field))
+
 #define _RTE_STR(x) #x
 /** Take a macro value and get a string version of it */
 #define RTE_STR(x) _RTE_STR(x)
diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile
index f3b76ad23..019c8dd8f 100644
--- a/lib/librte_mbuf/Makefile
+++ b/lib/librte_mbuf/Makefile
@@ -17,11 +17,13 @@ LIBABIVER := 5
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c rte_mbuf_pool_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_MBUF) += rte_mbuf_dyn.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include += rte_mbuf_core.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include += rte_mbuf_ptype.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include += rte_mbuf_pool_ops.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include += rte_mbuf_dyn.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build
index 36bb6eb9d..59fd07224 100644
--- a/lib/librte_mbuf/meson.build
+++ b/lib/librte_mbuf/meson.build
@@ -2,9 +2,11 @@
 # Copyright(c) 2017 Intel Corporation
 
 version = 5
-sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c')
+sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c',
+	'rte_mbuf_dyn.c')
 headers = files('rte_mbuf.h', 'rte_mbuf_core.h',
-		'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
+		'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h',
+		'rte_mbuf_dyn.h')
 deps += ['mempool']
 
 allow_experimental_apis = true
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index bd26764a2..92d81972a 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -1000,6 +1000,20 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
  */
 #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
 
+/**
+ * Copy dynamic fields from msrc to mdst.
+ *
+ * @param mdst
+ *   The destination mbuf.
+ * @param msrc
+ *   The source mbuf.
+ */
+static inline void
+rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
+{
+	memcpy(&mdst->dynfield1, msrc->dynfield1, sizeof(mdst->dynfield1));
+}
+
 /* internal */
 static inline void
 __rte_pktmbuf_copy_hdr(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
@@ -1011,6 +1025,7 @@ __rte_pktmbuf_copy_hdr(struct rte_mbuf *mdst, const struct rte_mbuf *msrc)
 	mdst->hash = msrc->hash;
 	mdst->packet_type = msrc->packet_type;
 	mdst->timestamp = msrc->timestamp;
+	rte_mbuf_dynfield_copy(mdst, msrc);
 }
 
 /**
diff --git a/lib/librte_mbuf/rte_mbuf_core.h b/lib/librte_mbuf/rte_mbuf_core.h
index 3398c12c8..302270146 100644
--- a/lib/librte_mbuf/rte_mbuf_core.h
+++ b/lib/librte_mbuf/rte_mbuf_core.h
@@ -184,9 +184,12 @@ extern "C" {
 #define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
 #define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
 
-/* add new RX flags here */
+/* add new RX flags here, don't forget to update PKT_FIRST_FREE */
 
-/* add new TX flags here */
+#define PKT_FIRST_FREE (1ULL << 23)
+#define PKT_LAST_FREE (1ULL << 39)
+
+/* add new TX flags here, don't forget to update PKT_LAST_FREE  */
 
 /**
  * Indicate that the metadata field in the mbuf is in use.
@@ -689,6 +692,7 @@ struct rte_mbuf {
 	 */
 	struct rte_mbuf_ext_shared_info *shinfo;
 
+	uint64_t dynfield1[2]; /**< Reserved for dynamic fields. */
 } __rte_cache_aligned;
 
 /**
diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c b/lib/librte_mbuf/rte_mbuf_dyn.c
new file mode 100644
index 000000000..d6931f847
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf_dyn.c
@@ -0,0 +1,553 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 6WIND S.A.
+ */
+
+#include <sys/queue.h>
+#include <stdint.h>
+#include <limits.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_tailq.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+#include <rte_mbuf.h>
+#include <rte_mbuf_dyn.h>
+
+#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
+
+struct mbuf_dynfield_elt {
+	TAILQ_ENTRY(mbuf_dynfield_elt) next;
+	struct rte_mbuf_dynfield params;
+	size_t offset;
+};
+TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
+
+static struct rte_tailq_elem mbuf_dynfield_tailq = {
+	.name = "RTE_MBUF_DYNFIELD",
+};
+EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
+
+struct mbuf_dynflag_elt {
+	TAILQ_ENTRY(mbuf_dynflag_elt) next;
+	struct rte_mbuf_dynflag params;
+	unsigned int bitnum;
+};
+TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
+
+static struct rte_tailq_elem mbuf_dynflag_tailq = {
+	.name = "RTE_MBUF_DYNFLAG",
+};
+EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
+
+struct mbuf_dyn_shm {
+	/**
+	 * For each mbuf byte, free_space[i] != 0 if space is free.
+	 * The value is the size of the biggest aligned element that
+	 * can fit in the zone.
+	 */
+	uint8_t free_space[sizeof(struct rte_mbuf)];
+	/** Bitfield of available flags. */
+	uint64_t free_flags;
+};
+static struct mbuf_dyn_shm *shm;
+
+/* Set the value of free_space[] according to the size and alignment of
+ * the free areas. This helps to select the best place when reserving a
+ * dynamic field. Assume tailq is locked.
+ */
+static void
+process_score(void)
+{
+	size_t off, align, size, i;
+
+	/* first, erase previous info */
+	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
+		if (shm->free_space[i])
+			shm->free_space[i] = 1;
+	}
+
+	for (off = 0; off < sizeof(struct rte_mbuf); off++) {
+		/* get the size of the free zone */
+		for (size = 0; shm->free_space[off + size]; size++)
+			;
+		if (size == 0)
+			continue;
+
+		/* get the alignment of biggest object that can fit in
+		 * the zone at this offset.
+		 */
+		for (align = 1;
+		     (off % (align << 1)) == 0 && (align << 1) <= size;
+		     align <<= 1)
+			;
+
+		/* save it in free_space[] */
+		for (i = off; i < off + size; i++)
+			shm->free_space[i] = RTE_MAX(align, shm->free_space[i]);
+	}
+}
+
+/* Mark the area occupied by a mbuf field as available in the shm. */
+#define mark_free(field)						\
+	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
+		1, sizeof(((struct rte_mbuf *)0)->field))
+
+/* Allocate and initialize the shared memory. Assume tailq is locked */
+static int
+init_shared_mem(void)
+{
+	const struct rte_memzone *mz;
+	uint64_t mask;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
+						sizeof(struct mbuf_dyn_shm),
+						SOCKET_ID_ANY, 0,
+						RTE_CACHE_LINE_SIZE);
+	} else {
+		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
+	}
+	if (mz == NULL)
+		return -1;
+
+	shm = mz->addr;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		/* init free_space, keep it sync'd with
+		 * rte_mbuf_dynfield_copy().
+		 */
+		memset(shm, 0, sizeof(*shm));
+		mark_free(dynfield1);
+
+		/* init free_flags */
+		for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask <<= 1)
+			shm->free_flags |= mask;
+
+		process_score();
+	}
+
+	return 0;
+}
+
+/* check if this offset can be used */
+static int
+check_offset(size_t offset, size_t size, size_t align)
+{
+	size_t i;
+
+	if ((offset & (align - 1)) != 0)
+		return -1;
+	if (offset + size > sizeof(struct rte_mbuf))
+		return -1;
+
+	for (i = 0; i < size; i++) {
+		if (!shm->free_space[i + offset])
+			return -1;
+	}
+
+	return 0;
+}
+
+/* assume tailq is locked */
+static struct mbuf_dynfield_elt *
+__mbuf_dynfield_lookup(const char *name)
+{
+	struct mbuf_dynfield_list *mbuf_dynfield_list;
+	struct mbuf_dynfield_elt *mbuf_dynfield;
+	struct rte_tailq_entry *te;
+
+	mbuf_dynfield_list = RTE_TAILQ_CAST(
+		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
+
+	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
+		mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
+		if (strcmp(name, mbuf_dynfield->params.name) == 0)
+			break;
+	}
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return mbuf_dynfield;
+}
+
+int
+rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield *params)
+{
+	struct mbuf_dynfield_elt *mbuf_dynfield;
+
+	if (shm == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	rte_mcfg_tailq_read_lock();
+	mbuf_dynfield = __mbuf_dynfield_lookup(name);
+	rte_mcfg_tailq_read_unlock();
+
+	if (mbuf_dynfield == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	if (params != NULL)
+		memcpy(params, &mbuf_dynfield->params, sizeof(*params));
+
+	return mbuf_dynfield->offset;
+}
+
+static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
+		const struct rte_mbuf_dynfield *params2)
+{
+	if (strcmp(params1->name, params2->name))
+		return -1;
+	if (params1->size != params2->size)
+		return -1;
+	if (params1->align != params2->align)
+		return -1;
+	if (params1->flags != params2->flags)
+		return -1;
+	return 0;
+}
+
+/* assume tailq is locked */
+static int
+__rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
+				size_t req)
+{
+	struct mbuf_dynfield_list *mbuf_dynfield_list;
+	struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
+	struct rte_tailq_entry *te = NULL;
+	unsigned int best_zone = UINT_MAX;
+	size_t i, offset;
+	int ret;
+
+	if (shm == NULL && init_shared_mem() < 0)
+		return -1;
+
+	mbuf_dynfield = __mbuf_dynfield_lookup(params->name);
+	if (mbuf_dynfield != NULL) {
+		if (req != SIZE_MAX && req != mbuf_dynfield->offset) {
+			rte_errno = EEXIST;
+			return -1;
+		}
+		if (mbuf_dynfield_cmp(params, &mbuf_dynfield->params) < 0) {
+			rte_errno = EEXIST;
+			return -1;
+		}
+		return mbuf_dynfield->offset;
+	}
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		rte_errno = EPERM;
+		return -1;
+	}
+
+	if (req == SIZE_MAX) {
+		/* Find the best place to put this field: we search the
+		 * lowest value of shm->free_space[offset]: the zones
+		 * containing room for larger fields are kept for later.
+		 */
+		for (offset = 0;
+		     offset < sizeof(struct rte_mbuf);
+		     offset++) {
+			if (check_offset(offset, params->size,
+						params->align) == 0 &&
+					shm->free_space[offset] < best_zone) {
+				best_zone = shm->free_space[offset];
+				req = offset;
+			}
+		}
+		if (req == SIZE_MAX) {
+			rte_errno = ENOENT;
+			return -1;
+		}
+	} else {
+		if (check_offset(req, params->size, params->align) < 0) {
+			rte_errno = EBUSY;
+			return -1;
+		}
+	}
+
+	offset = req;
+	mbuf_dynfield_list = RTE_TAILQ_CAST(
+		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
+
+	te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL)
+		return -1;
+
+	mbuf_dynfield = rte_zmalloc("mbuf_dynfield", sizeof(*mbuf_dynfield), 0);
+	if (mbuf_dynfield == NULL) {
+		rte_free(te);
+		return -1;
+	}
+
+	ret = strlcpy(mbuf_dynfield->params.name, params->name,
+		sizeof(mbuf_dynfield->params.name));
+	if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->params.name)) {
+		rte_errno = ENAMETOOLONG;
+		rte_free(mbuf_dynfield);
+		rte_free(te);
+		return -1;
+	}
+	memcpy(&mbuf_dynfield->params, params, sizeof(mbuf_dynfield->params));
+	mbuf_dynfield->offset = offset;
+	te->data = mbuf_dynfield;
+
+	TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
+
+	for (i = offset; i < offset + params->size; i++)
+		shm->free_space[i] = 0;
+	process_score();
+
+	RTE_LOG(DEBUG, MBUF, "Registered dynamic field %s (sz=%zu, al=%zu, fl=0x%x) -> %zd\n",
+		params->name, params->size, params->align, params->flags,
+		offset);
+
+	return offset;
+}
+
+int
+rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
+				size_t req)
+{
+	int ret;
+
+	if (params->size >= sizeof(struct rte_mbuf)) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	if (!rte_is_power_of_2(params->align)) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	if (params->flags != 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	rte_mcfg_tailq_write_lock();
+	ret = __rte_mbuf_dynfield_register_offset(params, req);
+	rte_mcfg_tailq_write_unlock();
+
+	return ret;
+}
+
+int
+rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params)
+{
+	return rte_mbuf_dynfield_register_offset(params, SIZE_MAX);
+}
+
+/* assume tailq is locked */
+static struct mbuf_dynflag_elt *
+__mbuf_dynflag_lookup(const char *name)
+{
+	struct mbuf_dynflag_list *mbuf_dynflag_list;
+	struct mbuf_dynflag_elt *mbuf_dynflag;
+	struct rte_tailq_entry *te;
+
+	mbuf_dynflag_list = RTE_TAILQ_CAST(
+		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
+
+	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
+		mbuf_dynflag = (struct mbuf_dynflag_elt *)te->data;
+		if (strncmp(name, mbuf_dynflag->params.name,
+				RTE_MBUF_DYN_NAMESIZE) == 0)
+			break;
+	}
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return mbuf_dynflag;
+}
+
+int
+rte_mbuf_dynflag_lookup(const char *name,
+			struct rte_mbuf_dynflag *params)
+{
+	struct mbuf_dynflag_elt *mbuf_dynflag;
+
+	if (shm == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	rte_mcfg_tailq_read_lock();
+	mbuf_dynflag = __mbuf_dynflag_lookup(name);
+	rte_mcfg_tailq_read_unlock();
+
+	if (mbuf_dynflag == NULL) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	if (params != NULL)
+		memcpy(params, &mbuf_dynflag->params, sizeof(*params));
+
+	return mbuf_dynflag->bitnum;
+}
+
+static int mbuf_dynflag_cmp(const struct rte_mbuf_dynflag *params1,
+		const struct rte_mbuf_dynflag *params2)
+{
+	if (strcmp(params1->name, params2->name))
+		return -1;
+	if (params1->flags != params2->flags)
+		return -1;
+	return 0;
+}
+
+/* assume tailq is locked */
+static int
+__rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
+				unsigned int req)
+{
+	struct mbuf_dynflag_list *mbuf_dynflag_list;
+	struct mbuf_dynflag_elt *mbuf_dynflag = NULL;
+	struct rte_tailq_entry *te = NULL;
+	unsigned int bitnum;
+	int ret;
+
+	if (shm == NULL && init_shared_mem() < 0)
+		return -1;
+
+	mbuf_dynflag = __mbuf_dynflag_lookup(params->name);
+	if (mbuf_dynflag != NULL) {
+		if (req != UINT_MAX && req != mbuf_dynflag->bitnum) {
+			rte_errno = EEXIST;
+			return -1;
+		}
+		if (mbuf_dynflag_cmp(params, &mbuf_dynflag->params) < 0) {
+			rte_errno = EEXIST;
+			return -1;
+		}
+		return mbuf_dynflag->bitnum;
+	}
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		rte_errno = EPERM;
+		return -1;
+	}
+
+	if (req == UINT_MAX) {
+		if (shm->free_flags == 0) {
+			rte_errno = ENOENT;
+			return -1;
+		}
+		bitnum = rte_bsf64(shm->free_flags);
+	} else {
+		if ((shm->free_flags & (1ULL << req)) == 0) {
+			rte_errno = EBUSY;
+			return -1;
+		}
+		bitnum = req;
+	}
+
+	mbuf_dynflag_list = RTE_TAILQ_CAST(
+		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
+
+	te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL)
+		return -1;
+
+	mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag), 0);
+	if (mbuf_dynflag == NULL) {
+		rte_free(te);
+		return -1;
+	}
+
+	ret = strlcpy(mbuf_dynflag->params.name, params->name,
+		sizeof(mbuf_dynflag->params.name));
+	if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->params.name)) {
+		rte_free(mbuf_dynflag);
+		rte_free(te);
+		rte_errno = ENAMETOOLONG;
+		return -1;
+	}
+	mbuf_dynflag->bitnum = bitnum;
+	te->data = mbuf_dynflag;
+
+	TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
+
+	shm->free_flags &= ~(1ULL << bitnum);
+
+	RTE_LOG(DEBUG, MBUF, "Registered dynamic flag %s (fl=0x%x) -> %u\n",
+		params->name, params->flags, bitnum);
+
+	return bitnum;
+}
+
+int
+rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
+				unsigned int req)
+{
+	int ret;
+
+	if (req >= RTE_SIZEOF_FIELD(struct rte_mbuf, ol_flags) * CHAR_BIT &&
+			req != UINT_MAX) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	rte_mcfg_tailq_write_lock();
+	ret = __rte_mbuf_dynflag_register_bitnum(params, req);
+	rte_mcfg_tailq_write_unlock();
+
+	return ret;
+}
+
+int
+rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params)
+{
+	return rte_mbuf_dynflag_register_bitnum(params, UINT_MAX);
+}
+
+void rte_mbuf_dyn_dump(FILE *out)
+{
+	struct mbuf_dynfield_list *mbuf_dynfield_list;
+	struct mbuf_dynfield_elt *dynfield;
+	struct mbuf_dynflag_list *mbuf_dynflag_list;
+	struct mbuf_dynflag_elt *dynflag;
+	struct rte_tailq_entry *te;
+	size_t i;
+
+	rte_mcfg_tailq_write_lock();
+	init_shared_mem();
+	fprintf(out, "Reserved fields:\n");
+	mbuf_dynfield_list = RTE_TAILQ_CAST(
+		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
+	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
+		dynfield = (struct mbuf_dynfield_elt *)te->data;
+		fprintf(out, "  name=%s offset=%zd size=%zd align=%zd flags=%x\n",
+			dynfield->params.name, dynfield->offset,
+			dynfield->params.size, dynfield->params.align,
+			dynfield->params.flags);
+	}
+	fprintf(out, "Reserved flags:\n");
+	mbuf_dynflag_list = RTE_TAILQ_CAST(
+		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
+	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
+		dynflag = (struct mbuf_dynflag_elt *)te->data;
+		fprintf(out, "  name=%s bitnum=%u flags=%x\n",
+			dynflag->params.name, dynflag->bitnum,
+			dynflag->params.flags);
+	}
+	fprintf(out, "Free space in mbuf (0 = free, value = zone alignment):\n");
+	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
+		if ((i % 8) == 0)
+			fprintf(out, "  %4.4zx: ", i);
+		fprintf(out, "%2.2x%s", shm->free_space[i],
+			(i % 8 != 7) ? " " : "\n");
+	}
+	rte_mcfg_tailq_write_unlock();
+}
diff --git a/lib/librte_mbuf/rte_mbuf_dyn.h b/lib/librte_mbuf/rte_mbuf_dyn.h
new file mode 100644
index 000000000..2e9d418cf
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf_dyn.h
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 6WIND S.A.
+ */
+
+#ifndef _RTE_MBUF_DYN_H_
+#define _RTE_MBUF_DYN_H_
+
+/**
+ * @file
+ * RTE Mbuf dynamic fields and flags
+ *
+ * Many DPDK features require to store data inside the mbuf. As the room
+ * in mbuf structure is limited, it is not possible to have a field for
+ * each feature. Also, changing fields in the mbuf structure can break
+ * the API or ABI.
+ *
+ * This module addresses this issue, by enabling the dynamic
+ * registration of fields or flags:
+ *
+ * - a dynamic field is a named area in the rte_mbuf structure, with a
+ *   given size (>= 1 byte) and alignment constraint.
+ * - a dynamic flag is a named bit in the rte_mbuf structure, stored
+ *   in mbuf->ol_flags.
+ *
+ * The placement of the field or flag can be automatic, in this case the
+ * zones that have the smallest size and alignment constraint are
+ * selected in priority. Else, a specific field offset or flag bit
+ * number can be requested through the API.
+ *
+ * The typical use case is when a specific offload feature requires to
+ * register a dedicated offload field in the mbuf structure, and adding
+ * a static field or flag is not justified.
+ *
+ * Example of use:
+ *
+ * - A rte_mbuf_dynfield structure is defined, containing the parameters
+ *   of the dynamic field to be registered:
+ *   const struct rte_mbuf_dynfield rte_dynfield_my_feature = { ... };
+ * - The application initializes the PMD, and asks for this feature
+ *   at port initialization by passing DEV_RX_OFFLOAD_MY_FEATURE in
+ *   rxconf. This will make the PMD to register the field by calling
+ *   rte_mbuf_dynfield_register(&rte_dynfield_my_feature). The PMD
+ *   stores the returned offset.
+ * - The application that uses the offload feature also registers
+ *   the field to retrieve the same offset.
+ * - When the PMD receives a packet, it can set the field:
+ *   *RTE_MBUF_DYNFIELD(m, offset, <type *>) = value;
+ * - In the main loop, the application can retrieve the value with
+ *   the same macro.
+ *
+ * To avoid wasting space, the dynamic fields or flags must only be
+ * reserved on demand, when an application asks for the related feature.
+ *
+ * The registration can be done at any moment, but it is not possible
+ * to unregister fields or flags for now.
+ *
+ * A dynamic field can be reserved and used by an application only.
+ * It can for instance be a packet mark.
+ *
+ * To avoid namespace collisions, the dynamic mbuf field or flag names
+ * have to be chosen with care. It is advised to use the same
+ * conventions than function names in dpdk:
+ * - "rte_mbuf_dynfield_<name>" if defined in mbuf library
+ * - "rte_<libname>_dynfield_<name>" if defined in another library
+ * - "rte_net_<pmd>_dynfield_<name>" if defined in a in PMD
+ * - any name that does not start with "rte_" in an application
+ */
+
+#include <sys/types.h>
+/**
+ * Maximum length of the dynamic field or flag string.
+ */
+#define RTE_MBUF_DYN_NAMESIZE 64
+
+/**
+ * Structure describing the parameters of a mbuf dynamic field.
+ */
+struct rte_mbuf_dynfield {
+	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the field. */
+	size_t size;        /**< The number of bytes to reserve. */
+	size_t align;       /**< The alignment constraint (power of 2). */
+	unsigned int flags; /**< Reserved for future use, must be 0. */
+};
+
+/**
+ * Structure describing the parameters of a mbuf dynamic flag.
+ */
+struct rte_mbuf_dynflag {
+	char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the dynamic flag. */
+	unsigned int flags; /**< Reserved for future use, must be 0. */
+};
+
+/**
+ * Register space for a dynamic field in the mbuf structure.
+ *
+ * If the field is already registered (same name and parameters), its
+ * offset is returned.
+ *
+ * @param params
+ *   A structure containing the requested parameters (name, size,
+ *   alignment constraint and flags).
+ * @return
+ *   The offset in the mbuf structure, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - EINVAL: invalid parameters (size, align, or flags).
+ *   - EEXIST: this name is already register with different parameters.
+ *   - EPERM: called from a secondary process.
+ *   - ENOENT: not enough room in mbuf.
+ *   - ENOMEM: allocation failure.
+ *   - ENAMETOOLONG: name does not ends with \0.
+ */
+__rte_experimental
+int rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params);
+
+/**
+ * Register space for a dynamic field in the mbuf structure at offset.
+ *
+ * If the field is already registered (same name, parameters and offset),
+ * the offset is returned.
+ *
+ * @param params
+ *   A structure containing the requested parameters (name, size,
+ *   alignment constraint and flags).
+ * @param offset
+ *   The requested offset. Ignored if SIZE_MAX is passed.
+ * @return
+ *   The offset in the mbuf structure, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - EINVAL: invalid parameters (size, align, flags, or offset).
+ *   - EEXIST: this name is already register with different parameters.
+ *   - EBUSY: the requested offset cannot be used.
+ *   - EPERM: called from a secondary process.
+ *   - ENOENT: not enough room in mbuf.
+ *   - ENOMEM: allocation failure.
+ *   - ENAMETOOLONG: name does not ends with \0.
+ */
+__rte_experimental
+int rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
+				size_t offset);
+
+/**
+ * Lookup for a registered dynamic mbuf field.
+ *
+ * @param name
+ *   A string identifying the dynamic field.
+ * @param params
+ *   If not NULL, and if the lookup is successful, the structure is
+ *   filled with the parameters of the dynamic field.
+ * @return
+ *   The offset of this field in the mbuf structure, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - ENOENT: no dynamic field matches this name.
+ */
+__rte_experimental
+int rte_mbuf_dynfield_lookup(const char *name,
+			struct rte_mbuf_dynfield *params);
+
+/**
+ * Register a dynamic flag in the mbuf structure.
+ *
+ * If the flag is already registered (same name and parameters), its
+ * bitnum is returned.
+ *
+ * @param params
+ *   A structure containing the requested parameters of the dynamic
+ *   flag (name and options).
+ * @return
+ *   The number of the reserved bit, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - EINVAL: invalid parameters (size, align, or flags).
+ *   - EEXIST: this name is already register with different parameters.
+ *   - EPERM: called from a secondary process.
+ *   - ENOENT: no more flag available.
+ *   - ENOMEM: allocation failure.
+ *   - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE - 1.
+ */
+__rte_experimental
+int rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params);
+
+/**
+ * Register a dynamic flag in the mbuf structure specifying bitnum.
+ *
+ * If the flag is already registered (same name, parameters and bitnum),
+ * the bitnum is returned.
+ *
+ * @param params
+ *   A structure containing the requested parameters of the dynamic
+ *   flag (name and options).
+ * @param bitnum
+ *   The requested bitnum. Ignored if UINT_MAX is passed.
+ * @return
+ *   The number of the reserved bit, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - EINVAL: invalid parameters (size, align, or flags).
+ *   - EEXIST: this name is already register with different parameters.
+ *   - EBUSY: the requested bitnum cannot be used.
+ *   - EPERM: called from a secondary process.
+ *   - ENOENT: no more flag available.
+ *   - ENOMEM: allocation failure.
+ *   - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE - 1.
+ */
+__rte_experimental
+int rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
+				unsigned int bitnum);
+
+/**
+ * Lookup for a registered dynamic mbuf flag.
+ *
+ * @param name
+ *   A string identifying the dynamic flag.
+ * @param params
+ *   If not NULL, and if the lookup is successful, the structure is
+ *   filled with the parameters of the dynamic flag.
+ * @return
+ *   The offset of this flag in the mbuf structure, or -1 on error.
+ *   Possible values for rte_errno:
+ *   - ENOENT: no dynamic flag matches this name.
+ */
+__rte_experimental
+int rte_mbuf_dynflag_lookup(const char *name,
+			struct rte_mbuf_dynflag *params);
+
+/**
+ * Helper macro to access to a dynamic field.
+ */
+#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) + (offset)))
+
+/**
+ * Dump the status of dynamic fields and flags.
+ *
+ * @param out
+ *   The stream where the status is displayed.
+ */
+__rte_experimental
+void rte_mbuf_dyn_dump(FILE *out);
+
+/* Placeholder for dynamic fields and flags declarations. */
+
+#endif
diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
index a4f41d7fd..263dc0a21 100644
--- a/lib/librte_mbuf/rte_mbuf_version.map
+++ b/lib/librte_mbuf/rte_mbuf_version.map
@@ -58,6 +58,13 @@ EXPERIMENTAL {
 	global:
 
 	rte_mbuf_check;
+	rte_mbuf_dynfield_lookup;
+	rte_mbuf_dynfield_register;
+	rte_mbuf_dynfield_register_offset;
+	rte_mbuf_dynflag_lookup;
+	rte_mbuf_dynflag_register;
+	rte_mbuf_dynflag_register_bitnum;
+	rte_mbuf_dyn_dump;
 	rte_pktmbuf_copy;
 	rte_pktmbuf_free_bulk;
 
-- 
2.20.1


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [dpdk-dev] [PATCH v4] mbuf: support dynamic fields and flags
  2019-10-26 12:39 ` [dpdk-dev] [PATCH v4] " Olivier Matz
@ 2019-10-26 17:04   ` Thomas Monjalon
  0 siblings, 0 replies; 64+ messages in thread
From: Thomas Monjalon @ 2019-10-26 17:04 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Andrew Rybchenko, Bruce Richardson, Wang, Haiyue,
	Jerin Jacob Kollanukkaran, Wiles, Keith, Ananyev, Konstantin,
	Morten Brørup, Shahaf Shuler, Stephen Hemminger,
	Slava Ovsiienko

26/10/2019 14:39, Olivier Matz:
> Many features require to store data inside the mbuf. As the room in mbuf
> structure is limited, it is not possible to have a field for each
> feature. Also, changing fields in the mbuf structure can break the API
> or ABI.
> 
> This commit addresses these issues, by enabling the dynamic registration
> of fields or flags:
> 
> - a dynamic field is a named area in the rte_mbuf structure, with a
>   given size (>= 1 byte) and alignment constraint.
> - a dynamic flag is a named bit in the rte_mbuf structure.
> 
> The typical use case is a PMD that registers space for an offload
> feature, when the application requests to enable this feature.  As
> the space in mbuf is limited, the space should only be reserved if it
> is going to be used (i.e when the application explicitly asks for it).
> 
> The registration can be done at any moment, but it is not possible
> to unregister fields or flags.
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> Acked-by: Thomas Monjalon <thomas@monjalon.net>
> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

Applied, thanks, this is a new major feature.




^ permalink raw reply	[flat|nested] 64+ messages in thread

end of thread, other threads:[~2019-10-26 17:04 UTC | newest]

Thread overview: 64+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-10  9:29 [dpdk-dev] [RFC] mbuf: support dynamic fields and flags Olivier Matz
2019-07-10 17:14 ` Wang, Haiyue
2019-07-11  7:26   ` Olivier Matz
2019-07-11  8:04     ` Wang, Haiyue
2019-07-11  8:20       ` Olivier Matz
2019-07-11  8:34         ` Wang, Haiyue
2019-07-11 15:31     ` Stephen Hemminger
2019-07-12  9:18       ` Olivier Matz
2019-07-10 17:49 ` Stephen Hemminger
2019-07-10 18:12   ` Wiles, Keith
2019-07-11  7:53     ` Olivier Matz
2019-07-11 14:37       ` Wiles, Keith
2019-07-12  9:06         ` Olivier Matz
2019-07-11  7:36   ` Olivier Matz
2019-07-12 12:23     ` Jerin Jacob Kollanukkaran
2019-07-16  9:39       ` Olivier Matz
2019-07-16 14:43         ` Stephen Hemminger
2019-07-11  9:24 ` Thomas Monjalon
2019-07-12 14:54 ` Andrew Rybchenko
2019-07-16  9:49   ` Olivier Matz
2019-07-16 11:31     ` [dpdk-dev] ***Spam*** " Andrew Rybchenko
2019-09-18 16:54 ` [dpdk-dev] [PATCH] " Olivier Matz
2019-09-21  4:54   ` Wang, Haiyue
2019-09-23  8:31     ` Olivier Matz
2019-09-23 11:01       ` Wang, Haiyue
2019-09-21  8:28   ` Wiles, Keith
2019-09-23  8:56     ` Morten Brørup
2019-09-23  9:41       ` Olivier Matz
2019-09-23  9:13     ` Olivier Matz
2019-09-23 15:14       ` Wiles, Keith
2019-09-23 16:16         ` Olivier Matz
2019-09-23 17:14           ` Wiles, Keith
2019-09-23 16:09       ` Wiles, Keith
2019-10-01 10:49   ` Ananyev, Konstantin
2019-10-17  7:54     ` Olivier Matz
2019-10-17 11:58       ` Ananyev, Konstantin
2019-10-17 12:58         ` Olivier Matz
2019-10-17 14:42 ` [dpdk-dev] [PATCH v2] " Olivier Matz
2019-10-18  2:47   ` Wang, Haiyue
2019-10-18  7:53     ` Olivier Matz
2019-10-18  8:28       ` Wang, Haiyue
2019-10-18  9:47         ` Olivier Matz
2019-10-18 11:24           ` Wang, Haiyue
2019-10-22 22:51   ` Ananyev, Konstantin
2019-10-23  3:16     ` Wang, Haiyue
2019-10-23 10:21       ` Olivier Matz
2019-10-23 15:00         ` Stephen Hemminger
2019-10-23 15:12           ` Wang, Haiyue
2019-10-23 10:19     ` Olivier Matz
2019-10-23 11:45       ` Olivier Matz
2019-10-23 11:49         ` Ananyev, Konstantin
2019-10-23 12:00   ` Shahaf Shuler
2019-10-23 13:33     ` Olivier Matz
2019-10-24  4:54       ` Shahaf Shuler
2019-10-24  7:07         ` Olivier Matz
2019-10-24  7:38   ` Slava Ovsiienko
2019-10-24  7:56     ` Olivier Matz
2019-10-24  8:13 ` [dpdk-dev] [PATCH v3] " Olivier Matz
2019-10-24 15:30   ` Stephen Hemminger
2019-10-24 15:44     ` Thomas Monjalon
2019-10-24 17:07       ` Stephen Hemminger
2019-10-24 16:40   ` Thomas Monjalon
2019-10-26 12:39 ` [dpdk-dev] [PATCH v4] " Olivier Matz
2019-10-26 17:04   ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).