DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
@ 2014-09-29 10:15 Marc Sune
  2014-10-09  6:01 ` Zhang, Helin
  0 siblings, 1 reply; 13+ messages in thread
From: Marc Sune @ 2014-09-29 10:15 UTC (permalink / raw)
  To: dev

This patch implements the KNI memzone pool in order to:

* prevent memzone exhaustion when allocating/deallocating KNI
  interfaces.
* be able to allocate KNI interfaces with the same name as
  previously deallocated ones.

It adds a new API call, rte_kni_init(max_kni_ifaces) that shall
be called before any call to rte_kni_alloc() if KNI is used.

Signed-off-by: Marc Sune <marc.sune@bisdn.de>
---
 lib/librte_kni/rte_kni.c |  302 ++++++++++++++++++++++++++++++++++++++--------
 lib/librte_kni/rte_kni.h |   18 +++
 2 files changed, 269 insertions(+), 51 deletions(-)

diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 76feef4..df55789 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -40,6 +40,7 @@
 #include <unistd.h>
 #include <sys/ioctl.h>
 
+#include <rte_spinlock.h>
 #include <rte_string_fns.h>
 #include <rte_ethdev.h>
 #include <rte_malloc.h>
@@ -58,7 +59,7 @@
 
 #define KNI_REQUEST_MBUF_NUM_MAX      32
 
-#define KNI_MZ_CHECK(mz) do { if (mz) goto fail; } while (0)
+#define KNI_MEM_CHECK(cond) do { if (cond) goto kni_fail; } while (0)
 
 /**
  * KNI context
@@ -66,6 +67,7 @@
 struct rte_kni {
 	char name[RTE_KNI_NAMESIZE];        /**< KNI interface name */
 	uint16_t group_id;                  /**< Group ID of KNI devices */
+	unsigned slot_id;                   /**< KNI pool slot ID */
 	struct rte_mempool *pktmbuf_pool;   /**< pkt mbuf mempool */
 	unsigned mbuf_size;                 /**< mbuf size */
 
@@ -88,10 +90,48 @@ enum kni_ops_status {
 	KNI_REQ_REGISTERED,
 };
 
+/**
+* KNI memzone pool slot
+*/
+struct rte_kni_memzone_slot{
+	unsigned id;
+	uint8_t in_use : 1;                    /**< slot in use */
+
+	//Memzones
+	const struct rte_memzone *m_ctx;       /**< KNI ctx */
+	const struct rte_memzone *m_tx_q;      /**< TX queue */
+	const struct rte_memzone *m_rx_q;      /**< RX queue */
+	const struct rte_memzone *m_alloc_q;   /**< Allocated mbufs queue */
+	const struct rte_memzone *m_free_q;    /**< To be freed mbufs queue */
+	const struct rte_memzone *m_req_q;     /**< Request queue */
+	const struct rte_memzone *m_resp_q;    /**< Response queue */
+	const struct rte_memzone *m_sync_addr;         
+	
+	/* Free linked list */
+	struct rte_kni_memzone_slot *next;     /**< Next slot link.list */
+};
+
+/**
+* KNI memzone pool
+*/
+struct rte_kni_memzone_pool{
+	uint8_t initialized : 1;            /**< Global KNI pool init flag */
+ 
+	unsigned max_ifaces;                /**< Max. num of KNI ifaces */
+	struct rte_kni_memzone_slot *slots;        /**< Pool slots */
+	rte_spinlock_t mutex;               /**< alloc/relase mutex */
+
+	//Free memzone slots linked-list
+	struct rte_kni_memzone_slot *free;         /**< First empty slot */
+	struct rte_kni_memzone_slot *free_tail;    /**< Last empty slot */
+};
+
+
 static void kni_free_mbufs(struct rte_kni *kni);
 static void kni_allocate_mbufs(struct rte_kni *kni);
 
 static volatile int kni_fd = -1;
+static struct rte_kni_memzone_pool kni_memzone_pool = {0};
 
 static const struct rte_memzone *
 kni_memzone_reserve(const char *name, size_t len, int socket_id,
@@ -105,6 +145,154 @@ kni_memzone_reserve(const char *name, size_t len, int socket_id,
 	return mz;
 }
 
+/* Pool mgmt */
+static struct rte_kni_memzone_slot*
+kni_memzone_pool_alloc(void)
+{
+	struct rte_kni_memzone_slot* slot;
+	
+	rte_spinlock_lock(&kni_memzone_pool.mutex);	
+
+	if(!kni_memzone_pool.free) {
+		rte_spinlock_unlock(&kni_memzone_pool.mutex);	
+		return NULL;
+	}
+
+	slot = kni_memzone_pool.free;
+	kni_memzone_pool.free = slot->next;
+
+	if(!kni_memzone_pool.free)
+		kni_memzone_pool.free_tail = NULL;
+
+	rte_spinlock_unlock(&kni_memzone_pool.mutex);
+
+	return slot;
+}
+
+static void 
+kni_memzone_pool_dealloc(struct rte_kni_memzone_slot* slot)
+{
+	rte_spinlock_lock(&kni_memzone_pool.mutex);	
+
+	if(kni_memzone_pool.free)
+		kni_memzone_pool.free_tail->next = slot;
+	else
+		kni_memzone_pool.free = slot;
+
+	kni_memzone_pool.free_tail = slot;
+	slot->next = NULL;
+
+	rte_spinlock_unlock(&kni_memzone_pool.mutex);
+}
+
+
+/* Shall be called before any allocation happens */
+void
+rte_kni_init(unsigned int max_kni_ifaces)
+{
+	unsigned i;
+	struct rte_kni_memzone_slot* it;
+	const struct rte_memzone *mz;
+#define OBJNAMSIZ 32
+	char obj_name[OBJNAMSIZ];
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+
+	if(max_kni_ifaces == 0) {
+		//Panic
+		RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d\n",
+							max_kni_ifaces);
+		rte_panic("Unable to initialize KNI\n");
+	}
+
+	//Allocate slot objects
+	kni_memzone_pool.slots = (struct rte_kni_memzone_slot*)rte_malloc(NULL,
+					sizeof(struct rte_kni_memzone_slot) *
+					max_kni_ifaces,
+					0);
+	KNI_MEM_CHECK(kni_memzone_pool.slots == NULL);
+
+	//Initialize general stuff 
+	kni_memzone_pool.initialized = 1;
+	kni_memzone_pool.max_ifaces = max_kni_ifaces;
+	kni_memzone_pool.free = &kni_memzone_pool.slots[0];
+
+	//Pre-allocate all memzones of the slots; panic on error
+	for(i=0; i<max_kni_ifaces; i++) {
+
+		//Recover current slot
+		it = &kni_memzone_pool.slots[i];
+		it->id = i;
+	
+		//Allocate KNI context
+		snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%d", i);
+		mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni),
+					SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_ctx = mz;
+			
+		/* TX RING */
+		snprintf(obj_name, OBJNAMSIZ, "kni_tx_%d", i);
+		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, 
+							SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_tx_q = mz;
+
+		/* RX RING */
+		snprintf(obj_name, OBJNAMSIZ, "kni_rx_%d", i);
+		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, 
+							SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_rx_q = mz;
+
+		/* ALLOC RING */
+		snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%d", i);
+		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
+							SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_alloc_q = mz;
+
+		/* FREE RING */
+		snprintf(obj_name, OBJNAMSIZ, "kni_free_%d", i);
+		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, 
+							SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_free_q = mz;
+
+		/* Request RING */
+		snprintf(obj_name, OBJNAMSIZ, "kni_req_%d", i);
+		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, 
+							SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_req_q = mz;
+
+		/* Response RING */
+		snprintf(obj_name, OBJNAMSIZ, "kni_resp_%d", i);
+		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, 
+							SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_resp_q = mz;
+
+		/* Req/Resp sync mem area */
+		snprintf(obj_name, OBJNAMSIZ, "kni_sync_%d", i);
+		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, 
+							SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_sync_addr = mz;
+
+		if(i+1 == max_kni_ifaces) {
+			it->next = NULL;
+			kni_memzone_pool.free_tail = it;
+		}else
+			it->next = &kni_memzone_pool.slots[i+1];
+	}
+	
+	return;
+
+kni_fail:
+	rte_panic("Unable to allocate memory for max_kni_ifaces:%d."
+		"increase the amount of hugepages memory\n", max_kni_ifaces);
+}
+
 /* It is deprecated and just for backward compatibility */
 struct rte_kni *
 rte_kni_create(uint8_t port_id,
@@ -140,14 +328,20 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 	struct rte_kni_device_info dev_info;
 	struct rte_kni *ctx;
 	char intf_name[RTE_KNI_NAMESIZE];
-#define OBJNAMSIZ 32
-	char obj_name[OBJNAMSIZ];
 	char mz_name[RTE_MEMZONE_NAMESIZE];
 	const struct rte_memzone *mz;
+	struct rte_kni_memzone_slot* slot=NULL;
 
 	if (!pktmbuf_pool || !conf || !conf->name[0])
 		return NULL;
 
+	/* Check inited */
+	if (kni_memzone_pool.initialized != 1) {
+		RTE_LOG(ERR, KNI, "KNI subsystem has not been initialized. "
+				"Invoke rte_kni_init() first\n");
+		return NULL;
+	}
+
 	/* Check FD and open once */
 	if (kni_fd < 0) {
 		kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);
@@ -158,16 +352,23 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 		}
 	}
 
+	/* Get an available slot from the pool */
+	slot = kni_memzone_pool_alloc();
+	if(!slot) {
+		RTE_LOG(ERR, KNI, "Cannot allocate more KNI interfaces; "
+			"increase the number of max_kni_ifaces(current %d) or "
+			"deallocate unusued ones.\n", 
+			kni_memzone_pool.max_ifaces);
+		return NULL;
+	}
+	
+	//Recover ctx
+	ctx = slot->m_ctx->addr;
 	snprintf(intf_name, RTE_KNI_NAMESIZE, "%s", conf->name);
-	snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%s", intf_name);
-	mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni),
-				SOCKET_ID_ANY, 0);
-	KNI_MZ_CHECK(mz == NULL);
-	ctx = mz->addr;
 
 	if (ctx->in_use) {
 		RTE_LOG(ERR, KNI, "KNI %s is in use\n", ctx->name);
-		goto fail;
+		return NULL;
 	}
 	memset(ctx, 0, sizeof(struct rte_kni));
 	if (ops)
@@ -190,83 +391,72 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 	RTE_LOG(INFO, KNI, "pci: %02x:%02x:%02x \t %02x:%02x\n",
 		dev_info.bus, dev_info.devid, dev_info.function,
 			dev_info.vendor_id, dev_info.device_id);
-
 	/* TX RING */
-	snprintf(obj_name, OBJNAMSIZ, "kni_tx_%s", intf_name);
-	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
-	KNI_MZ_CHECK(mz == NULL);
+	mz = slot->m_tx_q;
 	ctx->tx_q = mz->addr;
 	kni_fifo_init(ctx->tx_q, KNI_FIFO_COUNT_MAX);
 	dev_info.tx_phys = mz->phys_addr;
 
 	/* RX RING */
-	snprintf(obj_name, OBJNAMSIZ, "kni_rx_%s", intf_name);
-	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
-	KNI_MZ_CHECK(mz == NULL);
+	mz = slot->m_rx_q;
 	ctx->rx_q = mz->addr;
 	kni_fifo_init(ctx->rx_q, KNI_FIFO_COUNT_MAX);
 	dev_info.rx_phys = mz->phys_addr;
 
 	/* ALLOC RING */
-	snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%s", intf_name);
-	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
-	KNI_MZ_CHECK(mz == NULL);
+	mz = slot->m_alloc_q;
 	ctx->alloc_q = mz->addr;
 	kni_fifo_init(ctx->alloc_q, KNI_FIFO_COUNT_MAX);
 	dev_info.alloc_phys = mz->phys_addr;
 
 	/* FREE RING */
-	snprintf(obj_name, OBJNAMSIZ, "kni_free_%s", intf_name);
-	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
-	KNI_MZ_CHECK(mz == NULL);
+	mz = slot->m_free_q;
 	ctx->free_q = mz->addr;
 	kni_fifo_init(ctx->free_q, KNI_FIFO_COUNT_MAX);
 	dev_info.free_phys = mz->phys_addr;
 
 	/* Request RING */
-	snprintf(obj_name, OBJNAMSIZ, "kni_req_%s", intf_name);
-	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
-	KNI_MZ_CHECK(mz == NULL);
+	mz = slot->m_req_q;
 	ctx->req_q = mz->addr;
 	kni_fifo_init(ctx->req_q, KNI_FIFO_COUNT_MAX);
 	dev_info.req_phys = mz->phys_addr;
 
 	/* Response RING */
-	snprintf(obj_name, OBJNAMSIZ, "kni_resp_%s", intf_name);
-	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
-	KNI_MZ_CHECK(mz == NULL);
+	mz = slot->m_resp_q;
 	ctx->resp_q = mz->addr;
 	kni_fifo_init(ctx->resp_q, KNI_FIFO_COUNT_MAX);
 	dev_info.resp_phys = mz->phys_addr;
 
 	/* Req/Resp sync mem area */
-	snprintf(obj_name, OBJNAMSIZ, "kni_sync_%s", intf_name);
-	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
-	KNI_MZ_CHECK(mz == NULL);
+	mz = slot->m_sync_addr; 
 	ctx->sync_addr = mz->addr;
 	dev_info.sync_va = mz->addr;
 	dev_info.sync_phys = mz->phys_addr;
 
+
 	/* MBUF mempool */
 	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME,
 		pktmbuf_pool->name);
 	mz = rte_memzone_lookup(mz_name);
-	KNI_MZ_CHECK(mz == NULL);
+	KNI_MEM_CHECK(mz == NULL);
 	dev_info.mbuf_va = mz->addr;
 	dev_info.mbuf_phys = mz->phys_addr;
 	ctx->pktmbuf_pool = pktmbuf_pool;
 	ctx->group_id = conf->group_id;
+	ctx->slot_id = slot->id;
 	ctx->mbuf_size = conf->mbuf_size;
 
 	ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
-	KNI_MZ_CHECK(ret < 0);
+	KNI_MEM_CHECK(ret < 0);
 
 	ctx->in_use = 1;
 
 	return ctx;
 
-fail:
-
+kni_fail:
+	if(slot)
+		kni_memzone_pool_dealloc(&kni_memzone_pool.slots[slot->id]);
+			
 	return NULL;
 }
 
@@ -287,6 +477,7 @@ int
 rte_kni_release(struct rte_kni *kni)
 {
 	struct rte_kni_device_info dev_info;
+	unsigned slot_id;
 
 	if (!kni || !kni->in_use)
 		return -1;
@@ -302,8 +493,19 @@ rte_kni_release(struct rte_kni *kni)
 	kni_free_fifo(kni->rx_q);
 	kni_free_fifo(kni->alloc_q);
 	kni_free_fifo(kni->free_q);
+
+	slot_id = kni->slot_id;		
+
+	//Memset
 	memset(kni, 0, sizeof(struct rte_kni));
 
+	//Release memzone
+	if(slot_id > kni_memzone_pool.max_ifaces) {
+		rte_panic("KNI pool: corrupted slot ID: %d, max: %d\n", 
+			slot_id, kni_memzone_pool.max_ifaces);
+	}
+	kni_memzone_pool_dealloc(&kni_memzone_pool.slots[slot_id]);
+
 	return 0;
 }
 
@@ -437,23 +639,21 @@ rte_kni_get_port_id(struct rte_kni *kni)
 struct rte_kni *
 rte_kni_get(const char *name)
 {
-	struct rte_kni *kni;
-	const struct rte_memzone *mz;
-	char mz_name[RTE_MEMZONE_NAMESIZE];
-
-	if (!name || !name[0])
-		return NULL;
-
-	snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%s", name);
-	mz = rte_memzone_lookup(mz_name);
-	if (!mz)
-		return NULL;
-
-	kni = mz->addr;
-	if (!kni->in_use)
-		return NULL;
+	unsigned i;
+	struct rte_kni_memzone_slot* it;
+	struct rte_kni* kni;
+
+	//Could be improved perf-wise if necessary
+	for(i=0; i<kni_memzone_pool.max_ifaces; i++) {
+		it = &kni_memzone_pool.slots[i];
+		if(it->in_use == 0)
+			continue;
+		kni = it->m_ctx->addr;
+		if(strncmp(kni->name, name, RTE_KNI_NAMESIZE) == 0)
+			return kni;
+	}
 
-	return kni;
+	return NULL;
 }
 
 /*
diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h
index 1a0b004..0159a1d 100644
--- a/lib/librte_kni/rte_kni.h
+++ b/lib/librte_kni/rte_kni.h
@@ -90,11 +90,27 @@ struct rte_kni_conf {
 };
 
 /**
+ * Initialize and preallocate KNI subsystem
+ *
+ * This function is to be executed on the MASTER lcore only, after EAL 
+ * initialization and before any KNI interface is attempted to be
+ * allocated
+ * 
+ * @param max_kni_ifaces 
+ *  The maximum number of KNI interfaces that can coexist concurrently
+ */
+extern void rte_kni_init(unsigned int max_kni_ifaces);
+
+
+/**
  * Allocate KNI interface according to the port id, mbuf size, mbuf pool,
  * configurations and callbacks for kernel requests.The KNI interface created
  * in the kernel space is the net interface the traditional Linux application
  * talking to.
  *
+ * The rte_kni_alloc shall not be called before rte_kni_init() has been
+ * called. rte_kni_alloc is thread safe. 
+ *
  * @param pktmbuf_pool
  *  The mempool for allocting mbufs for packets.
  * @param conf
@@ -138,6 +154,8 @@ extern struct rte_kni *rte_kni_create(uint8_t port_id,
  * Release KNI interface according to the context. It will also release the
  * paired KNI interface in kernel space. All processing on the specific KNI
  * context need to be stopped before calling this interface.
+ * 
+ * rte_kni_release is thread safe.
  *
  * @param kni
  *  The pointer to the context of an existent KNI interface.
-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
  2014-09-29 10:15 [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release Marc Sune
@ 2014-10-09  6:01 ` Zhang, Helin
  2014-10-09  7:05   ` Marc Sune
  0 siblings, 1 reply; 13+ messages in thread
From: Zhang, Helin @ 2014-10-09  6:01 UTC (permalink / raw)
  To: Marc Sune, dev

Hi Marc

Thanks for the idea on KNI! I have comments/questions as follows. Please correct me if I am wrong!

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Marc Sune
> Sent: Monday, September 29, 2014 6:16 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
> 
> This patch implements the KNI memzone pool in order to:
> 
> * prevent memzone exhaustion when allocating/deallocating KNI
>   interfaces.
What do you mean the "exhaustion"? Actually the memzones can be reused, though they cannot be resized.

> * be able to allocate KNI interfaces with the same name as
>   previously deallocated ones.
I think the current implementation can already allocate the same name KNI interface after being deallocated, as each time memzone reservation will try to find the same name memzone first. If the name has already been used, that means it has even been allocated, we can find it back and reuse it, though the size cannot be changed.
Have you encountered the problem you are trying to solve? Have you tried to use current implementation of KNI for the case you are trying to fix for?
If I am not wrong, we may not need the changes, as its current implementation already supports the scenarios you are trying to support. The only thing is that the sizes of memzones cannot be changed, but it seems no idea on that.

> 
> It adds a new API call, rte_kni_init(max_kni_ifaces) that shall be called before
> any call to rte_kni_alloc() if KNI is used.
> 
> Signed-off-by: Marc Sune <marc.sune@bisdn.de>
> ---
>  lib/librte_kni/rte_kni.c |  302
> ++++++++++++++++++++++++++++++++++++++--------
>  lib/librte_kni/rte_kni.h |   18 +++
>  2 files changed, 269 insertions(+), 51 deletions(-)
> 
> diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c index
> 76feef4..df55789 100644
> --- a/lib/librte_kni/rte_kni.c
> +++ b/lib/librte_kni/rte_kni.c
> @@ -40,6 +40,7 @@
>  #include <unistd.h>
>  #include <sys/ioctl.h>
> 
> +#include <rte_spinlock.h>
>  #include <rte_string_fns.h>
>  #include <rte_ethdev.h>
>  #include <rte_malloc.h>
> @@ -58,7 +59,7 @@
> 
>  #define KNI_REQUEST_MBUF_NUM_MAX      32
> 
> -#define KNI_MZ_CHECK(mz) do { if (mz) goto fail; } while (0)
> +#define KNI_MEM_CHECK(cond) do { if (cond) goto kni_fail; } while (0)
> 
>  /**
>   * KNI context
> @@ -66,6 +67,7 @@
>  struct rte_kni {
>  	char name[RTE_KNI_NAMESIZE];        /**< KNI interface name */
>  	uint16_t group_id;                  /**< Group ID of KNI devices */
> +	unsigned slot_id;                   /**< KNI pool slot ID */
>  	struct rte_mempool *pktmbuf_pool;   /**< pkt mbuf mempool */
>  	unsigned mbuf_size;                 /**< mbuf size */
> 
> @@ -88,10 +90,48 @@ enum kni_ops_status {
>  	KNI_REQ_REGISTERED,
>  };
> 
> +/**
> +* KNI memzone pool slot
> +*/
> +struct rte_kni_memzone_slot{
> +	unsigned id;
> +	uint8_t in_use : 1;                    /**< slot in use */
> +
> +	//Memzones
> +	const struct rte_memzone *m_ctx;       /**< KNI ctx */
> +	const struct rte_memzone *m_tx_q;      /**< TX queue */
> +	const struct rte_memzone *m_rx_q;      /**< RX queue */
> +	const struct rte_memzone *m_alloc_q;   /**< Allocated mbufs queue */
> +	const struct rte_memzone *m_free_q;    /**< To be freed mbufs queue
> */
> +	const struct rte_memzone *m_req_q;     /**< Request queue */
> +	const struct rte_memzone *m_resp_q;    /**< Response queue */
> +	const struct rte_memzone *m_sync_addr;
> +
> +	/* Free linked list */
> +	struct rte_kni_memzone_slot *next;     /**< Next slot link.list */
> +};
> +
> +/**
> +* KNI memzone pool
> +*/
> +struct rte_kni_memzone_pool{
> +	uint8_t initialized : 1;            /**< Global KNI pool init flag */
> +
> +	unsigned max_ifaces;                /**< Max. num of KNI ifaces */
> +	struct rte_kni_memzone_slot *slots;        /**< Pool slots */
> +	rte_spinlock_t mutex;               /**< alloc/relase mutex */
> +
> +	//Free memzone slots linked-list
> +	struct rte_kni_memzone_slot *free;         /**< First empty slot */
> +	struct rte_kni_memzone_slot *free_tail;    /**< Last empty slot */
> +};
> +
> +
>  static void kni_free_mbufs(struct rte_kni *kni);  static void
> kni_allocate_mbufs(struct rte_kni *kni);
> 
>  static volatile int kni_fd = -1;
> +static struct rte_kni_memzone_pool kni_memzone_pool = {0};
> 
>  static const struct rte_memzone *
>  kni_memzone_reserve(const char *name, size_t len, int socket_id, @@
> -105,6 +145,154 @@ kni_memzone_reserve(const char *name, size_t len, int
> socket_id,
>  	return mz;
>  }
> 
> +/* Pool mgmt */
> +static struct rte_kni_memzone_slot*
> +kni_memzone_pool_alloc(void)
> +{
> +	struct rte_kni_memzone_slot* slot;
> +
> +	rte_spinlock_lock(&kni_memzone_pool.mutex);
> +
> +	if(!kni_memzone_pool.free) {
> +		rte_spinlock_unlock(&kni_memzone_pool.mutex);
> +		return NULL;
> +	}
> +
> +	slot = kni_memzone_pool.free;
> +	kni_memzone_pool.free = slot->next;
> +
> +	if(!kni_memzone_pool.free)
> +		kni_memzone_pool.free_tail = NULL;
> +
> +	rte_spinlock_unlock(&kni_memzone_pool.mutex);
> +
> +	return slot;
> +}
> +
> +static void
> +kni_memzone_pool_dealloc(struct rte_kni_memzone_slot* slot) {
> +	rte_spinlock_lock(&kni_memzone_pool.mutex);
> +
> +	if(kni_memzone_pool.free)
> +		kni_memzone_pool.free_tail->next = slot;
> +	else
> +		kni_memzone_pool.free = slot;
> +
> +	kni_memzone_pool.free_tail = slot;
> +	slot->next = NULL;
> +
> +	rte_spinlock_unlock(&kni_memzone_pool.mutex);
> +}
> +
> +
> +/* Shall be called before any allocation happens */ void
> +rte_kni_init(unsigned int max_kni_ifaces) {
> +	unsigned i;
> +	struct rte_kni_memzone_slot* it;
> +	const struct rte_memzone *mz;
> +#define OBJNAMSIZ 32
> +	char obj_name[OBJNAMSIZ];
> +	char mz_name[RTE_MEMZONE_NAMESIZE];
> +
> +	if(max_kni_ifaces == 0) {
> +		//Panic
> +		RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d\n",
> +							max_kni_ifaces);
> +		rte_panic("Unable to initialize KNI\n");
> +	}
> +
> +	//Allocate slot objects
> +	kni_memzone_pool.slots = (struct
> rte_kni_memzone_slot*)rte_malloc(NULL,
> +					sizeof(struct rte_kni_memzone_slot) *
> +					max_kni_ifaces,
> +					0);
> +	KNI_MEM_CHECK(kni_memzone_pool.slots == NULL);
> +
> +	//Initialize general stuff
> +	kni_memzone_pool.initialized = 1;
> +	kni_memzone_pool.max_ifaces = max_kni_ifaces;
> +	kni_memzone_pool.free = &kni_memzone_pool.slots[0];
> +
> +	//Pre-allocate all memzones of the slots; panic on error
> +	for(i=0; i<max_kni_ifaces; i++) {
> +
> +		//Recover current slot
> +		it = &kni_memzone_pool.slots[i];
> +		it->id = i;
> +
> +		//Allocate KNI context
> +		snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%d", i);
> +		mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni),
> +					SOCKET_ID_ANY, 0);
> +		KNI_MEM_CHECK(mz == NULL);
> +		it->m_ctx = mz;
> +
> +		/* TX RING */
> +		snprintf(obj_name, OBJNAMSIZ, "kni_tx_%d", i);
> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
> +							SOCKET_ID_ANY, 0);
> +		KNI_MEM_CHECK(mz == NULL);
> +		it->m_tx_q = mz;
> +
> +		/* RX RING */
> +		snprintf(obj_name, OBJNAMSIZ, "kni_rx_%d", i);
> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
> +							SOCKET_ID_ANY, 0);
> +		KNI_MEM_CHECK(mz == NULL);
> +		it->m_rx_q = mz;
> +
> +		/* ALLOC RING */
> +		snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%d", i);
> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
> +							SOCKET_ID_ANY, 0);
> +		KNI_MEM_CHECK(mz == NULL);
> +		it->m_alloc_q = mz;
> +
> +		/* FREE RING */
> +		snprintf(obj_name, OBJNAMSIZ, "kni_free_%d", i);
> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
> +							SOCKET_ID_ANY, 0);
> +		KNI_MEM_CHECK(mz == NULL);
> +		it->m_free_q = mz;
> +
> +		/* Request RING */
> +		snprintf(obj_name, OBJNAMSIZ, "kni_req_%d", i);
> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
> +							SOCKET_ID_ANY, 0);
> +		KNI_MEM_CHECK(mz == NULL);
> +		it->m_req_q = mz;
> +
> +		/* Response RING */
> +		snprintf(obj_name, OBJNAMSIZ, "kni_resp_%d", i);
> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
> +							SOCKET_ID_ANY, 0);
> +		KNI_MEM_CHECK(mz == NULL);
> +		it->m_resp_q = mz;
> +
> +		/* Req/Resp sync mem area */
> +		snprintf(obj_name, OBJNAMSIZ, "kni_sync_%d", i);
> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
> +							SOCKET_ID_ANY, 0);
> +		KNI_MEM_CHECK(mz == NULL);
> +		it->m_sync_addr = mz;
> +
> +		if(i+1 == max_kni_ifaces) {
> +			it->next = NULL;
> +			kni_memzone_pool.free_tail = it;
> +		}else
> +			it->next = &kni_memzone_pool.slots[i+1];
> +	}
> +
> +	return;
> +
> +kni_fail:
> +	rte_panic("Unable to allocate memory for max_kni_ifaces:%d."
> +		"increase the amount of hugepages memory\n", max_kni_ifaces); }
> +
>  /* It is deprecated and just for backward compatibility */  struct rte_kni *
> rte_kni_create(uint8_t port_id, @@ -140,14 +328,20 @@ rte_kni_alloc(struct
> rte_mempool *pktmbuf_pool,
>  	struct rte_kni_device_info dev_info;
>  	struct rte_kni *ctx;
>  	char intf_name[RTE_KNI_NAMESIZE];
> -#define OBJNAMSIZ 32
> -	char obj_name[OBJNAMSIZ];
>  	char mz_name[RTE_MEMZONE_NAMESIZE];
>  	const struct rte_memzone *mz;
> +	struct rte_kni_memzone_slot* slot=NULL;
> 
>  	if (!pktmbuf_pool || !conf || !conf->name[0])
>  		return NULL;
> 
> +	/* Check inited */
> +	if (kni_memzone_pool.initialized != 1) {
> +		RTE_LOG(ERR, KNI, "KNI subsystem has not been initialized. "
> +				"Invoke rte_kni_init() first\n");
> +		return NULL;
> +	}
> +
>  	/* Check FD and open once */
>  	if (kni_fd < 0) {
>  		kni_fd = open("/dev/" KNI_DEVICE, O_RDWR); @@ -158,16 +352,23
> @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
>  		}
>  	}
> 
> +	/* Get an available slot from the pool */
> +	slot = kni_memzone_pool_alloc();
> +	if(!slot) {
> +		RTE_LOG(ERR, KNI, "Cannot allocate more KNI interfaces; "
> +			"increase the number of max_kni_ifaces(current %d) or "
> +			"deallocate unusued ones.\n",
> +			kni_memzone_pool.max_ifaces);
> +		return NULL;
> +	}
> +
> +	//Recover ctx
> +	ctx = slot->m_ctx->addr;
>  	snprintf(intf_name, RTE_KNI_NAMESIZE, "%s", conf->name);
> -	snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%s",
> intf_name);
> -	mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni),
> -				SOCKET_ID_ANY, 0);
> -	KNI_MZ_CHECK(mz == NULL);
> -	ctx = mz->addr;
> 
>  	if (ctx->in_use) {
>  		RTE_LOG(ERR, KNI, "KNI %s is in use\n", ctx->name);
> -		goto fail;
> +		return NULL;
>  	}
>  	memset(ctx, 0, sizeof(struct rte_kni));
>  	if (ops)
> @@ -190,83 +391,72 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
>  	RTE_LOG(INFO, KNI, "pci: %02x:%02x:%02x \t %02x:%02x\n",
>  		dev_info.bus, dev_info.devid, dev_info.function,
>  			dev_info.vendor_id, dev_info.device_id);
> -
>  	/* TX RING */
> -	snprintf(obj_name, OBJNAMSIZ, "kni_tx_%s", intf_name);
> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
> 0);
> -	KNI_MZ_CHECK(mz == NULL);
> +	mz = slot->m_tx_q;
>  	ctx->tx_q = mz->addr;
>  	kni_fifo_init(ctx->tx_q, KNI_FIFO_COUNT_MAX);
>  	dev_info.tx_phys = mz->phys_addr;
> 
>  	/* RX RING */
> -	snprintf(obj_name, OBJNAMSIZ, "kni_rx_%s", intf_name);
> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
> 0);
> -	KNI_MZ_CHECK(mz == NULL);
> +	mz = slot->m_rx_q;
>  	ctx->rx_q = mz->addr;
>  	kni_fifo_init(ctx->rx_q, KNI_FIFO_COUNT_MAX);
>  	dev_info.rx_phys = mz->phys_addr;
> 
>  	/* ALLOC RING */
> -	snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%s", intf_name);
> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
> 0);
> -	KNI_MZ_CHECK(mz == NULL);
> +	mz = slot->m_alloc_q;
>  	ctx->alloc_q = mz->addr;
>  	kni_fifo_init(ctx->alloc_q, KNI_FIFO_COUNT_MAX);
>  	dev_info.alloc_phys = mz->phys_addr;
> 
>  	/* FREE RING */
> -	snprintf(obj_name, OBJNAMSIZ, "kni_free_%s", intf_name);
> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
> 0);
> -	KNI_MZ_CHECK(mz == NULL);
> +	mz = slot->m_free_q;
>  	ctx->free_q = mz->addr;
>  	kni_fifo_init(ctx->free_q, KNI_FIFO_COUNT_MAX);
>  	dev_info.free_phys = mz->phys_addr;
> 
>  	/* Request RING */
> -	snprintf(obj_name, OBJNAMSIZ, "kni_req_%s", intf_name);
> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
> 0);
> -	KNI_MZ_CHECK(mz == NULL);
> +	mz = slot->m_req_q;
>  	ctx->req_q = mz->addr;
>  	kni_fifo_init(ctx->req_q, KNI_FIFO_COUNT_MAX);
>  	dev_info.req_phys = mz->phys_addr;
> 
>  	/* Response RING */
> -	snprintf(obj_name, OBJNAMSIZ, "kni_resp_%s", intf_name);
> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
> 0);
> -	KNI_MZ_CHECK(mz == NULL);
> +	mz = slot->m_resp_q;
>  	ctx->resp_q = mz->addr;
>  	kni_fifo_init(ctx->resp_q, KNI_FIFO_COUNT_MAX);
>  	dev_info.resp_phys = mz->phys_addr;
> 
>  	/* Req/Resp sync mem area */
> -	snprintf(obj_name, OBJNAMSIZ, "kni_sync_%s", intf_name);
> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
> 0);
> -	KNI_MZ_CHECK(mz == NULL);
> +	mz = slot->m_sync_addr;
>  	ctx->sync_addr = mz->addr;
>  	dev_info.sync_va = mz->addr;
>  	dev_info.sync_phys = mz->phys_addr;
> 
> +
>  	/* MBUF mempool */
>  	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME,
>  		pktmbuf_pool->name);
>  	mz = rte_memzone_lookup(mz_name);
> -	KNI_MZ_CHECK(mz == NULL);
> +	KNI_MEM_CHECK(mz == NULL);
>  	dev_info.mbuf_va = mz->addr;
>  	dev_info.mbuf_phys = mz->phys_addr;
>  	ctx->pktmbuf_pool = pktmbuf_pool;
>  	ctx->group_id = conf->group_id;
> +	ctx->slot_id = slot->id;
>  	ctx->mbuf_size = conf->mbuf_size;
> 
>  	ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
> -	KNI_MZ_CHECK(ret < 0);
> +	KNI_MEM_CHECK(ret < 0);
> 
>  	ctx->in_use = 1;
> 
>  	return ctx;
> 
> -fail:
> -
> +kni_fail:
> +	if(slot)
> +		kni_memzone_pool_dealloc(&kni_memzone_pool.slots[slot->id]);
> +
>  	return NULL;
>  }
> 
> @@ -287,6 +477,7 @@ int
>  rte_kni_release(struct rte_kni *kni)
>  {
>  	struct rte_kni_device_info dev_info;
> +	unsigned slot_id;
> 
>  	if (!kni || !kni->in_use)
>  		return -1;
> @@ -302,8 +493,19 @@ rte_kni_release(struct rte_kni *kni)
>  	kni_free_fifo(kni->rx_q);
>  	kni_free_fifo(kni->alloc_q);
>  	kni_free_fifo(kni->free_q);
> +
> +	slot_id = kni->slot_id;
> +
> +	//Memset
>  	memset(kni, 0, sizeof(struct rte_kni));
> 
> +	//Release memzone
> +	if(slot_id > kni_memzone_pool.max_ifaces) {
> +		rte_panic("KNI pool: corrupted slot ID: %d, max: %d\n",
> +			slot_id, kni_memzone_pool.max_ifaces);
> +	}
> +	kni_memzone_pool_dealloc(&kni_memzone_pool.slots[slot_id]);
> +
>  	return 0;
>  }
> 
> @@ -437,23 +639,21 @@ rte_kni_get_port_id(struct rte_kni *kni)  struct
> rte_kni *  rte_kni_get(const char *name)  {
> -	struct rte_kni *kni;
> -	const struct rte_memzone *mz;
> -	char mz_name[RTE_MEMZONE_NAMESIZE];
> -
> -	if (!name || !name[0])
> -		return NULL;
> -
> -	snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%s", name);
> -	mz = rte_memzone_lookup(mz_name);
> -	if (!mz)
> -		return NULL;
> -
> -	kni = mz->addr;
> -	if (!kni->in_use)
> -		return NULL;
> +	unsigned i;
> +	struct rte_kni_memzone_slot* it;
> +	struct rte_kni* kni;
> +
> +	//Could be improved perf-wise if necessary
> +	for(i=0; i<kni_memzone_pool.max_ifaces; i++) {
> +		it = &kni_memzone_pool.slots[i];
> +		if(it->in_use == 0)
> +			continue;
> +		kni = it->m_ctx->addr;
> +		if(strncmp(kni->name, name, RTE_KNI_NAMESIZE) == 0)
> +			return kni;
> +	}
> 
> -	return kni;
> +	return NULL;
>  }
> 
>  /*
> diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h index
> 1a0b004..0159a1d 100644
> --- a/lib/librte_kni/rte_kni.h
> +++ b/lib/librte_kni/rte_kni.h
> @@ -90,11 +90,27 @@ struct rte_kni_conf {  };
> 
>  /**
> + * Initialize and preallocate KNI subsystem
> + *
> + * This function is to be executed on the MASTER lcore only, after EAL
> + * initialization and before any KNI interface is attempted to be
> + * allocated
> + *
> + * @param max_kni_ifaces
> + *  The maximum number of KNI interfaces that can coexist concurrently
> +*/ extern void rte_kni_init(unsigned int max_kni_ifaces);
> +
> +
> +/**
>   * Allocate KNI interface according to the port id, mbuf size, mbuf pool,
>   * configurations and callbacks for kernel requests.The KNI interface created
>   * in the kernel space is the net interface the traditional Linux application
>   * talking to.
>   *
> + * The rte_kni_alloc shall not be called before rte_kni_init() has been
> + * called. rte_kni_alloc is thread safe.
> + *
>   * @param pktmbuf_pool
>   *  The mempool for allocting mbufs for packets.
>   * @param conf
> @@ -138,6 +154,8 @@ extern struct rte_kni *rte_kni_create(uint8_t port_id,
>   * Release KNI interface according to the context. It will also release the
>   * paired KNI interface in kernel space. All processing on the specific KNI
>   * context need to be stopped before calling this interface.
> + *
> + * rte_kni_release is thread safe.
>   *
>   * @param kni
>   *  The pointer to the context of an existent KNI interface.
> --
> 1.7.10.4

Regards,
Helin

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
  2014-10-09  6:01 ` Zhang, Helin
@ 2014-10-09  7:05   ` Marc Sune
  2014-10-09  7:32     ` Zhang, Helin
  0 siblings, 1 reply; 13+ messages in thread
From: Marc Sune @ 2014-10-09  7:05 UTC (permalink / raw)
  To: Zhang, Helin; +Cc: dev

Hi Helin,

On 09/10/14 08:01, Zhang, Helin wrote:
> Hi Marc
>
> Thanks for the idea on KNI! I have comments/questions as follows. Please correct me if I am wrong!
>
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Marc Sune
>> Sent: Monday, September 29, 2014 6:16 PM
>> To: dev@dpdk.org
>> Subject: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
>>
>> This patch implements the KNI memzone pool in order to:
>>
>> * prevent memzone exhaustion when allocating/deallocating KNI
>>    interfaces.
> What do you mean the "exhaustion"? Actually the memzones can be reused, though they cannot be resized.

The exhaustion problem is due to the fact that the current 
implementation binds in the memzone names with a suffix of the name of 
the interface. This means if a DPDK application creates multiple 
interfaces (and destroys them) and creates some more (with different 
names) you quickly run out of memzones, because the ones allocated 
cannot be reused and are there until the end of the application lifetime.

This is precisely our problem, and the reason to create this memzone 
pool, so that once the ifaces are destroyes (rte_kni_release()), those 
memzones can be reused.
>> * be able to allocate KNI interfaces with the same name as
>>    previously deallocated ones.
> I think the current implementation can already allocate the same name KNI interface after being deallocated, as each time memzone reservation will try to find the same name memzone first. If the name has already been used, that means it has even been allocated, we can find it back and reuse it, though the size cannot be changed.
> Have you encountered the problem you are trying to solve? Have you tried to use current implementation of KNI for the case you are trying to fix for?
> If I am not wrong, we may not need the changes, as its current implementation already supports the scenarios you are trying to support. The only thing is that the sizes of memzones cannot be changed, but it seems no idea on that.

You are right. This comment should be deleted, I forgot to do it. We 
found the root cause of this after finishing this patch in our problem.

I can rework the comment and resend.

Marc

>
>> It adds a new API call, rte_kni_init(max_kni_ifaces) that shall be called before
>> any call to rte_kni_alloc() if KNI is used.
>>
>> Signed-off-by: Marc Sune <marc.sune@bisdn.de>
>> ---
>>   lib/librte_kni/rte_kni.c |  302
>> ++++++++++++++++++++++++++++++++++++++--------
>>   lib/librte_kni/rte_kni.h |   18 +++
>>   2 files changed, 269 insertions(+), 51 deletions(-)
>>
>> diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c index
>> 76feef4..df55789 100644
>> --- a/lib/librte_kni/rte_kni.c
>> +++ b/lib/librte_kni/rte_kni.c
>> @@ -40,6 +40,7 @@
>>   #include <unistd.h>
>>   #include <sys/ioctl.h>
>>
>> +#include <rte_spinlock.h>
>>   #include <rte_string_fns.h>
>>   #include <rte_ethdev.h>
>>   #include <rte_malloc.h>
>> @@ -58,7 +59,7 @@
>>
>>   #define KNI_REQUEST_MBUF_NUM_MAX      32
>>
>> -#define KNI_MZ_CHECK(mz) do { if (mz) goto fail; } while (0)
>> +#define KNI_MEM_CHECK(cond) do { if (cond) goto kni_fail; } while (0)
>>
>>   /**
>>    * KNI context
>> @@ -66,6 +67,7 @@
>>   struct rte_kni {
>>   	char name[RTE_KNI_NAMESIZE];        /**< KNI interface name */
>>   	uint16_t group_id;                  /**< Group ID of KNI devices */
>> +	unsigned slot_id;                   /**< KNI pool slot ID */
>>   	struct rte_mempool *pktmbuf_pool;   /**< pkt mbuf mempool */
>>   	unsigned mbuf_size;                 /**< mbuf size */
>>
>> @@ -88,10 +90,48 @@ enum kni_ops_status {
>>   	KNI_REQ_REGISTERED,
>>   };
>>
>> +/**
>> +* KNI memzone pool slot
>> +*/
>> +struct rte_kni_memzone_slot{
>> +	unsigned id;
>> +	uint8_t in_use : 1;                    /**< slot in use */
>> +
>> +	//Memzones
>> +	const struct rte_memzone *m_ctx;       /**< KNI ctx */
>> +	const struct rte_memzone *m_tx_q;      /**< TX queue */
>> +	const struct rte_memzone *m_rx_q;      /**< RX queue */
>> +	const struct rte_memzone *m_alloc_q;   /**< Allocated mbufs queue */
>> +	const struct rte_memzone *m_free_q;    /**< To be freed mbufs queue
>> */
>> +	const struct rte_memzone *m_req_q;     /**< Request queue */
>> +	const struct rte_memzone *m_resp_q;    /**< Response queue */
>> +	const struct rte_memzone *m_sync_addr;
>> +
>> +	/* Free linked list */
>> +	struct rte_kni_memzone_slot *next;     /**< Next slot link.list */
>> +};
>> +
>> +/**
>> +* KNI memzone pool
>> +*/
>> +struct rte_kni_memzone_pool{
>> +	uint8_t initialized : 1;            /**< Global KNI pool init flag */
>> +
>> +	unsigned max_ifaces;                /**< Max. num of KNI ifaces */
>> +	struct rte_kni_memzone_slot *slots;        /**< Pool slots */
>> +	rte_spinlock_t mutex;               /**< alloc/relase mutex */
>> +
>> +	//Free memzone slots linked-list
>> +	struct rte_kni_memzone_slot *free;         /**< First empty slot */
>> +	struct rte_kni_memzone_slot *free_tail;    /**< Last empty slot */
>> +};
>> +
>> +
>>   static void kni_free_mbufs(struct rte_kni *kni);  static void
>> kni_allocate_mbufs(struct rte_kni *kni);
>>
>>   static volatile int kni_fd = -1;
>> +static struct rte_kni_memzone_pool kni_memzone_pool = {0};
>>
>>   static const struct rte_memzone *
>>   kni_memzone_reserve(const char *name, size_t len, int socket_id, @@
>> -105,6 +145,154 @@ kni_memzone_reserve(const char *name, size_t len, int
>> socket_id,
>>   	return mz;
>>   }
>>
>> +/* Pool mgmt */
>> +static struct rte_kni_memzone_slot*
>> +kni_memzone_pool_alloc(void)
>> +{
>> +	struct rte_kni_memzone_slot* slot;
>> +
>> +	rte_spinlock_lock(&kni_memzone_pool.mutex);
>> +
>> +	if(!kni_memzone_pool.free) {
>> +		rte_spinlock_unlock(&kni_memzone_pool.mutex);
>> +		return NULL;
>> +	}
>> +
>> +	slot = kni_memzone_pool.free;
>> +	kni_memzone_pool.free = slot->next;
>> +
>> +	if(!kni_memzone_pool.free)
>> +		kni_memzone_pool.free_tail = NULL;
>> +
>> +	rte_spinlock_unlock(&kni_memzone_pool.mutex);
>> +
>> +	return slot;
>> +}
>> +
>> +static void
>> +kni_memzone_pool_dealloc(struct rte_kni_memzone_slot* slot) {
>> +	rte_spinlock_lock(&kni_memzone_pool.mutex);
>> +
>> +	if(kni_memzone_pool.free)
>> +		kni_memzone_pool.free_tail->next = slot;
>> +	else
>> +		kni_memzone_pool.free = slot;
>> +
>> +	kni_memzone_pool.free_tail = slot;
>> +	slot->next = NULL;
>> +
>> +	rte_spinlock_unlock(&kni_memzone_pool.mutex);
>> +}
>> +
>> +
>> +/* Shall be called before any allocation happens */ void
>> +rte_kni_init(unsigned int max_kni_ifaces) {
>> +	unsigned i;
>> +	struct rte_kni_memzone_slot* it;
>> +	const struct rte_memzone *mz;
>> +#define OBJNAMSIZ 32
>> +	char obj_name[OBJNAMSIZ];
>> +	char mz_name[RTE_MEMZONE_NAMESIZE];
>> +
>> +	if(max_kni_ifaces == 0) {
>> +		//Panic
>> +		RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d\n",
>> +							max_kni_ifaces);
>> +		rte_panic("Unable to initialize KNI\n");
>> +	}
>> +
>> +	//Allocate slot objects
>> +	kni_memzone_pool.slots = (struct
>> rte_kni_memzone_slot*)rte_malloc(NULL,
>> +					sizeof(struct rte_kni_memzone_slot) *
>> +					max_kni_ifaces,
>> +					0);
>> +	KNI_MEM_CHECK(kni_memzone_pool.slots == NULL);
>> +
>> +	//Initialize general stuff
>> +	kni_memzone_pool.initialized = 1;
>> +	kni_memzone_pool.max_ifaces = max_kni_ifaces;
>> +	kni_memzone_pool.free = &kni_memzone_pool.slots[0];
>> +
>> +	//Pre-allocate all memzones of the slots; panic on error
>> +	for(i=0; i<max_kni_ifaces; i++) {
>> +
>> +		//Recover current slot
>> +		it = &kni_memzone_pool.slots[i];
>> +		it->id = i;
>> +
>> +		//Allocate KNI context
>> +		snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%d", i);
>> +		mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni),
>> +					SOCKET_ID_ANY, 0);
>> +		KNI_MEM_CHECK(mz == NULL);
>> +		it->m_ctx = mz;
>> +
>> +		/* TX RING */
>> +		snprintf(obj_name, OBJNAMSIZ, "kni_tx_%d", i);
>> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
>> +							SOCKET_ID_ANY, 0);
>> +		KNI_MEM_CHECK(mz == NULL);
>> +		it->m_tx_q = mz;
>> +
>> +		/* RX RING */
>> +		snprintf(obj_name, OBJNAMSIZ, "kni_rx_%d", i);
>> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
>> +							SOCKET_ID_ANY, 0);
>> +		KNI_MEM_CHECK(mz == NULL);
>> +		it->m_rx_q = mz;
>> +
>> +		/* ALLOC RING */
>> +		snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%d", i);
>> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
>> +							SOCKET_ID_ANY, 0);
>> +		KNI_MEM_CHECK(mz == NULL);
>> +		it->m_alloc_q = mz;
>> +
>> +		/* FREE RING */
>> +		snprintf(obj_name, OBJNAMSIZ, "kni_free_%d", i);
>> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
>> +							SOCKET_ID_ANY, 0);
>> +		KNI_MEM_CHECK(mz == NULL);
>> +		it->m_free_q = mz;
>> +
>> +		/* Request RING */
>> +		snprintf(obj_name, OBJNAMSIZ, "kni_req_%d", i);
>> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
>> +							SOCKET_ID_ANY, 0);
>> +		KNI_MEM_CHECK(mz == NULL);
>> +		it->m_req_q = mz;
>> +
>> +		/* Response RING */
>> +		snprintf(obj_name, OBJNAMSIZ, "kni_resp_%d", i);
>> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
>> +							SOCKET_ID_ANY, 0);
>> +		KNI_MEM_CHECK(mz == NULL);
>> +		it->m_resp_q = mz;
>> +
>> +		/* Req/Resp sync mem area */
>> +		snprintf(obj_name, OBJNAMSIZ, "kni_sync_%d", i);
>> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
>> +							SOCKET_ID_ANY, 0);
>> +		KNI_MEM_CHECK(mz == NULL);
>> +		it->m_sync_addr = mz;
>> +
>> +		if(i+1 == max_kni_ifaces) {
>> +			it->next = NULL;
>> +			kni_memzone_pool.free_tail = it;
>> +		}else
>> +			it->next = &kni_memzone_pool.slots[i+1];
>> +	}
>> +
>> +	return;
>> +
>> +kni_fail:
>> +	rte_panic("Unable to allocate memory for max_kni_ifaces:%d."
>> +		"increase the amount of hugepages memory\n", max_kni_ifaces); }
>> +
>>   /* It is deprecated and just for backward compatibility */  struct rte_kni *
>> rte_kni_create(uint8_t port_id, @@ -140,14 +328,20 @@ rte_kni_alloc(struct
>> rte_mempool *pktmbuf_pool,
>>   	struct rte_kni_device_info dev_info;
>>   	struct rte_kni *ctx;
>>   	char intf_name[RTE_KNI_NAMESIZE];
>> -#define OBJNAMSIZ 32
>> -	char obj_name[OBJNAMSIZ];
>>   	char mz_name[RTE_MEMZONE_NAMESIZE];
>>   	const struct rte_memzone *mz;
>> +	struct rte_kni_memzone_slot* slot=NULL;
>>
>>   	if (!pktmbuf_pool || !conf || !conf->name[0])
>>   		return NULL;
>>
>> +	/* Check inited */
>> +	if (kni_memzone_pool.initialized != 1) {
>> +		RTE_LOG(ERR, KNI, "KNI subsystem has not been initialized. "
>> +				"Invoke rte_kni_init() first\n");
>> +		return NULL;
>> +	}
>> +
>>   	/* Check FD and open once */
>>   	if (kni_fd < 0) {
>>   		kni_fd = open("/dev/" KNI_DEVICE, O_RDWR); @@ -158,16 +352,23
>> @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
>>   		}
>>   	}
>>
>> +	/* Get an available slot from the pool */
>> +	slot = kni_memzone_pool_alloc();
>> +	if(!slot) {
>> +		RTE_LOG(ERR, KNI, "Cannot allocate more KNI interfaces; "
>> +			"increase the number of max_kni_ifaces(current %d) or "
>> +			"deallocate unusued ones.\n",
>> +			kni_memzone_pool.max_ifaces);
>> +		return NULL;
>> +	}
>> +
>> +	//Recover ctx
>> +	ctx = slot->m_ctx->addr;
>>   	snprintf(intf_name, RTE_KNI_NAMESIZE, "%s", conf->name);
>> -	snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%s",
>> intf_name);
>> -	mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni),
>> -				SOCKET_ID_ANY, 0);
>> -	KNI_MZ_CHECK(mz == NULL);
>> -	ctx = mz->addr;
>>
>>   	if (ctx->in_use) {
>>   		RTE_LOG(ERR, KNI, "KNI %s is in use\n", ctx->name);
>> -		goto fail;
>> +		return NULL;
>>   	}
>>   	memset(ctx, 0, sizeof(struct rte_kni));
>>   	if (ops)
>> @@ -190,83 +391,72 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
>>   	RTE_LOG(INFO, KNI, "pci: %02x:%02x:%02x \t %02x:%02x\n",
>>   		dev_info.bus, dev_info.devid, dev_info.function,
>>   			dev_info.vendor_id, dev_info.device_id);
>> -
>>   	/* TX RING */
>> -	snprintf(obj_name, OBJNAMSIZ, "kni_tx_%s", intf_name);
>> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
>> 0);
>> -	KNI_MZ_CHECK(mz == NULL);
>> +	mz = slot->m_tx_q;
>>   	ctx->tx_q = mz->addr;
>>   	kni_fifo_init(ctx->tx_q, KNI_FIFO_COUNT_MAX);
>>   	dev_info.tx_phys = mz->phys_addr;
>>
>>   	/* RX RING */
>> -	snprintf(obj_name, OBJNAMSIZ, "kni_rx_%s", intf_name);
>> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
>> 0);
>> -	KNI_MZ_CHECK(mz == NULL);
>> +	mz = slot->m_rx_q;
>>   	ctx->rx_q = mz->addr;
>>   	kni_fifo_init(ctx->rx_q, KNI_FIFO_COUNT_MAX);
>>   	dev_info.rx_phys = mz->phys_addr;
>>
>>   	/* ALLOC RING */
>> -	snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%s", intf_name);
>> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
>> 0);
>> -	KNI_MZ_CHECK(mz == NULL);
>> +	mz = slot->m_alloc_q;
>>   	ctx->alloc_q = mz->addr;
>>   	kni_fifo_init(ctx->alloc_q, KNI_FIFO_COUNT_MAX);
>>   	dev_info.alloc_phys = mz->phys_addr;
>>
>>   	/* FREE RING */
>> -	snprintf(obj_name, OBJNAMSIZ, "kni_free_%s", intf_name);
>> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
>> 0);
>> -	KNI_MZ_CHECK(mz == NULL);
>> +	mz = slot->m_free_q;
>>   	ctx->free_q = mz->addr;
>>   	kni_fifo_init(ctx->free_q, KNI_FIFO_COUNT_MAX);
>>   	dev_info.free_phys = mz->phys_addr;
>>
>>   	/* Request RING */
>> -	snprintf(obj_name, OBJNAMSIZ, "kni_req_%s", intf_name);
>> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
>> 0);
>> -	KNI_MZ_CHECK(mz == NULL);
>> +	mz = slot->m_req_q;
>>   	ctx->req_q = mz->addr;
>>   	kni_fifo_init(ctx->req_q, KNI_FIFO_COUNT_MAX);
>>   	dev_info.req_phys = mz->phys_addr;
>>
>>   	/* Response RING */
>> -	snprintf(obj_name, OBJNAMSIZ, "kni_resp_%s", intf_name);
>> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
>> 0);
>> -	KNI_MZ_CHECK(mz == NULL);
>> +	mz = slot->m_resp_q;
>>   	ctx->resp_q = mz->addr;
>>   	kni_fifo_init(ctx->resp_q, KNI_FIFO_COUNT_MAX);
>>   	dev_info.resp_phys = mz->phys_addr;
>>
>>   	/* Req/Resp sync mem area */
>> -	snprintf(obj_name, OBJNAMSIZ, "kni_sync_%s", intf_name);
>> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
>> 0);
>> -	KNI_MZ_CHECK(mz == NULL);
>> +	mz = slot->m_sync_addr;
>>   	ctx->sync_addr = mz->addr;
>>   	dev_info.sync_va = mz->addr;
>>   	dev_info.sync_phys = mz->phys_addr;
>>
>> +
>>   	/* MBUF mempool */
>>   	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME,
>>   		pktmbuf_pool->name);
>>   	mz = rte_memzone_lookup(mz_name);
>> -	KNI_MZ_CHECK(mz == NULL);
>> +	KNI_MEM_CHECK(mz == NULL);
>>   	dev_info.mbuf_va = mz->addr;
>>   	dev_info.mbuf_phys = mz->phys_addr;
>>   	ctx->pktmbuf_pool = pktmbuf_pool;
>>   	ctx->group_id = conf->group_id;
>> +	ctx->slot_id = slot->id;
>>   	ctx->mbuf_size = conf->mbuf_size;
>>
>>   	ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
>> -	KNI_MZ_CHECK(ret < 0);
>> +	KNI_MEM_CHECK(ret < 0);
>>
>>   	ctx->in_use = 1;
>>
>>   	return ctx;
>>
>> -fail:
>> -
>> +kni_fail:
>> +	if(slot)
>> +		kni_memzone_pool_dealloc(&kni_memzone_pool.slots[slot->id]);
>> +
>>   	return NULL;
>>   }
>>
>> @@ -287,6 +477,7 @@ int
>>   rte_kni_release(struct rte_kni *kni)
>>   {
>>   	struct rte_kni_device_info dev_info;
>> +	unsigned slot_id;
>>
>>   	if (!kni || !kni->in_use)
>>   		return -1;
>> @@ -302,8 +493,19 @@ rte_kni_release(struct rte_kni *kni)
>>   	kni_free_fifo(kni->rx_q);
>>   	kni_free_fifo(kni->alloc_q);
>>   	kni_free_fifo(kni->free_q);
>> +
>> +	slot_id = kni->slot_id;
>> +
>> +	//Memset
>>   	memset(kni, 0, sizeof(struct rte_kni));
>>
>> +	//Release memzone
>> +	if(slot_id > kni_memzone_pool.max_ifaces) {
>> +		rte_panic("KNI pool: corrupted slot ID: %d, max: %d\n",
>> +			slot_id, kni_memzone_pool.max_ifaces);
>> +	}
>> +	kni_memzone_pool_dealloc(&kni_memzone_pool.slots[slot_id]);
>> +
>>   	return 0;
>>   }
>>
>> @@ -437,23 +639,21 @@ rte_kni_get_port_id(struct rte_kni *kni)  struct
>> rte_kni *  rte_kni_get(const char *name)  {
>> -	struct rte_kni *kni;
>> -	const struct rte_memzone *mz;
>> -	char mz_name[RTE_MEMZONE_NAMESIZE];
>> -
>> -	if (!name || !name[0])
>> -		return NULL;
>> -
>> -	snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%s", name);
>> -	mz = rte_memzone_lookup(mz_name);
>> -	if (!mz)
>> -		return NULL;
>> -
>> -	kni = mz->addr;
>> -	if (!kni->in_use)
>> -		return NULL;
>> +	unsigned i;
>> +	struct rte_kni_memzone_slot* it;
>> +	struct rte_kni* kni;
>> +
>> +	//Could be improved perf-wise if necessary
>> +	for(i=0; i<kni_memzone_pool.max_ifaces; i++) {
>> +		it = &kni_memzone_pool.slots[i];
>> +		if(it->in_use == 0)
>> +			continue;
>> +		kni = it->m_ctx->addr;
>> +		if(strncmp(kni->name, name, RTE_KNI_NAMESIZE) == 0)
>> +			return kni;
>> +	}
>>
>> -	return kni;
>> +	return NULL;
>>   }
>>
>>   /*
>> diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h index
>> 1a0b004..0159a1d 100644
>> --- a/lib/librte_kni/rte_kni.h
>> +++ b/lib/librte_kni/rte_kni.h
>> @@ -90,11 +90,27 @@ struct rte_kni_conf {  };
>>
>>   /**
>> + * Initialize and preallocate KNI subsystem
>> + *
>> + * This function is to be executed on the MASTER lcore only, after EAL
>> + * initialization and before any KNI interface is attempted to be
>> + * allocated
>> + *
>> + * @param max_kni_ifaces
>> + *  The maximum number of KNI interfaces that can coexist concurrently
>> +*/ extern void rte_kni_init(unsigned int max_kni_ifaces);
>> +
>> +
>> +/**
>>    * Allocate KNI interface according to the port id, mbuf size, mbuf pool,
>>    * configurations and callbacks for kernel requests.The KNI interface created
>>    * in the kernel space is the net interface the traditional Linux application
>>    * talking to.
>>    *
>> + * The rte_kni_alloc shall not be called before rte_kni_init() has been
>> + * called. rte_kni_alloc is thread safe.
>> + *
>>    * @param pktmbuf_pool
>>    *  The mempool for allocting mbufs for packets.
>>    * @param conf
>> @@ -138,6 +154,8 @@ extern struct rte_kni *rte_kni_create(uint8_t port_id,
>>    * Release KNI interface according to the context. It will also release the
>>    * paired KNI interface in kernel space. All processing on the specific KNI
>>    * context need to be stopped before calling this interface.
>> + *
>> + * rte_kni_release is thread safe.
>>    *
>>    * @param kni
>>    *  The pointer to the context of an existent KNI interface.
>> --
>> 1.7.10.4
> Regards,
> Helin
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
  2014-10-09  7:05   ` Marc Sune
@ 2014-10-09  7:32     ` Zhang, Helin
  2014-10-09  7:52       ` Marc Sune
  0 siblings, 1 reply; 13+ messages in thread
From: Zhang, Helin @ 2014-10-09  7:32 UTC (permalink / raw)
  To: Marc Sune; +Cc: dev

Hi Marc

Good explanation! Thank you very much! I add more comments for your code changes.
One common comment for annotation, in DPDK, we do not use "//" to start annotation.

> -----Original Message-----
> From: Marc Sune [mailto:marc.sune@bisdn.de]
> Sent: Thursday, October 9, 2014 3:05 PM
> To: Zhang, Helin
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
> 
> Hi Helin,
> 
> On 09/10/14 08:01, Zhang, Helin wrote:
> > Hi Marc
> >
> > Thanks for the idea on KNI! I have comments/questions as follows. Please
> correct me if I am wrong!
> >
> >> -----Original Message-----
> >> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Marc Sune
> >> Sent: Monday, September 29, 2014 6:16 PM
> >> To: dev@dpdk.org
> >> Subject: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI
> >> alloc/release
> >>
> >> This patch implements the KNI memzone pool in order to:
> >>
> >> * prevent memzone exhaustion when allocating/deallocating KNI
> >>    interfaces.
> > What do you mean the "exhaustion"? Actually the memzones can be reused,
> though they cannot be resized.
> 
> The exhaustion problem is due to the fact that the current implementation
> binds in the memzone names with a suffix of the name of the interface. This
> means if a DPDK application creates multiple interfaces (and destroys them)
> and creates some more (with different
> names) you quickly run out of memzones, because the ones allocated cannot be
> reused and are there until the end of the application lifetime.
> 
> This is precisely our problem, and the reason to create this memzone pool, so
> that once the ifaces are destroyes (rte_kni_release()), those memzones can be
> reused.
Understood! Thanks!

> >> * be able to allocate KNI interfaces with the same name as
> >>    previously deallocated ones.
> > I think the current implementation can already allocate the same name KNI
> interface after being deallocated, as each time memzone reservation will try to
> find the same name memzone first. If the name has already been used, that
> means it has even been allocated, we can find it back and reuse it, though the
> size cannot be changed.
> > Have you encountered the problem you are trying to solve? Have you tried to
> use current implementation of KNI for the case you are trying to fix for?
> > If I am not wrong, we may not need the changes, as its current
> implementation already supports the scenarios you are trying to support. The
> only thing is that the sizes of memzones cannot be changed, but it seems no
> idea on that.
> 
> You are right. This comment should be deleted, I forgot to do it. We found the
> root cause of this after finishing this patch in our problem.
> 
> I can rework the comment and resend.
> 
> Marc
> 
> >
> >> It adds a new API call, rte_kni_init(max_kni_ifaces) that shall be
> >> called before any call to rte_kni_alloc() if KNI is used.
To avoid the additional interface, this initialization works can be done during the
first time of calling rte_kni_alloc(), please refer to how it opens kni_fd ("/dev/kni").
Also I think there should be some de-initialization works should be done in rte_kni_close().

> >>
> >> Signed-off-by: Marc Sune <marc.sune@bisdn.de>
> >> ---
> >>   lib/librte_kni/rte_kni.c |  302
> >> ++++++++++++++++++++++++++++++++++++++--------
> >>   lib/librte_kni/rte_kni.h |   18 +++
> >>   2 files changed, 269 insertions(+), 51 deletions(-)
> >>
> >> diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
> >> index
> >> 76feef4..df55789 100644
> >> --- a/lib/librte_kni/rte_kni.c
> >> +++ b/lib/librte_kni/rte_kni.c
> >> @@ -40,6 +40,7 @@
> >>   #include <unistd.h>
> >>   #include <sys/ioctl.h>
> >>
> >> +#include <rte_spinlock.h>
> >>   #include <rte_string_fns.h>
> >>   #include <rte_ethdev.h>
> >>   #include <rte_malloc.h>
> >> @@ -58,7 +59,7 @@
> >>
> >>   #define KNI_REQUEST_MBUF_NUM_MAX      32
> >>
> >> -#define KNI_MZ_CHECK(mz) do { if (mz) goto fail; } while (0)
> >> +#define KNI_MEM_CHECK(cond) do { if (cond) goto kni_fail; } while
> >> +(0)
> >>
> >>   /**
> >>    * KNI context
> >> @@ -66,6 +67,7 @@
> >>   struct rte_kni {
> >>   	char name[RTE_KNI_NAMESIZE];        /**< KNI interface name
> */
> >>   	uint16_t group_id;                  /**< Group ID of KNI devices
> */
> >> +	unsigned slot_id;                   /**< KNI pool slot ID */
It would be better to use uint16_t or similar, as that's DPDK style.

> >>   	struct rte_mempool *pktmbuf_pool;   /**< pkt mbuf mempool */
> >>   	unsigned mbuf_size;                 /**< mbuf size */
> >>
> >> @@ -88,10 +90,48 @@ enum kni_ops_status {
> >>   	KNI_REQ_REGISTERED,
> >>   };
> >>
> >> +/**
> >> +* KNI memzone pool slot
> >> +*/
> >> +struct rte_kni_memzone_slot{
> >> +	unsigned id;
> >> +	uint8_t in_use : 1;                    /**< slot in use */
> >> +
> >> +	//Memzones
The comments style is not DPDK style, please try to use DPDK style as others.

> >> +	const struct rte_memzone *m_ctx;       /**< KNI ctx */
> >> +	const struct rte_memzone *m_tx_q;      /**< TX queue */
> >> +	const struct rte_memzone *m_rx_q;      /**< RX queue */
> >> +	const struct rte_memzone *m_alloc_q;   /**< Allocated mbufs queue */
> >> +	const struct rte_memzone *m_free_q;    /**< To be freed mbufs queue
> >> */
> >> +	const struct rte_memzone *m_req_q;     /**< Request queue */
> >> +	const struct rte_memzone *m_resp_q;    /**< Response queue */
> >> +	const struct rte_memzone *m_sync_addr;
> >> +
> >> +	/* Free linked list */
> >> +	struct rte_kni_memzone_slot *next;     /**< Next slot link.list */
For the linked list management, "TAILQ_" might be a better choice. Please check
if it can be usable here.

> >> +};
> >> +
> >> +/**
> >> +* KNI memzone pool
> >> +*/
> >> +struct rte_kni_memzone_pool{
> >> +	uint8_t initialized : 1;            /**< Global KNI pool init flag */
> >> +
> >> +	unsigned max_ifaces;                /**< Max. num of KNI ifaces */
> >> +	struct rte_kni_memzone_slot *slots;        /**< Pool slots */
> >> +	rte_spinlock_t mutex;               /**< alloc/relase mutex */
> >> +
> >> +	//Free memzone slots linked-list
> >> +	struct rte_kni_memzone_slot *free;         /**< First empty slot */
> >> +	struct rte_kni_memzone_slot *free_tail;    /**< Last empty slot */
> >> +};
> >> +
> >> +
> >>   static void kni_free_mbufs(struct rte_kni *kni);  static void
> >> kni_allocate_mbufs(struct rte_kni *kni);
> >>
> >>   static volatile int kni_fd = -1;
> >> +static struct rte_kni_memzone_pool kni_memzone_pool = {0};
> >>
> >>   static const struct rte_memzone *
> >>   kni_memzone_reserve(const char *name, size_t len, int socket_id, @@
> >> -105,6 +145,154 @@ kni_memzone_reserve(const char *name, size_t len,
> >> int socket_id,
> >>   	return mz;
> >>   }
> >>
> >> +/* Pool mgmt */
> >> +static struct rte_kni_memzone_slot*
> >> +kni_memzone_pool_alloc(void)
> >> +{
> >> +	struct rte_kni_memzone_slot* slot;
> >> +
> >> +	rte_spinlock_lock(&kni_memzone_pool.mutex);
> >> +
> >> +	if(!kni_memzone_pool.free) {
> >> +		rte_spinlock_unlock(&kni_memzone_pool.mutex);
> >> +		return NULL;
> >> +	}
> >> +
> >> +	slot = kni_memzone_pool.free;
> >> +	kni_memzone_pool.free = slot->next;
> >> +
> >> +	if(!kni_memzone_pool.free)
> >> +		kni_memzone_pool.free_tail = NULL;
> >> +
> >> +	rte_spinlock_unlock(&kni_memzone_pool.mutex);
> >> +
> >> +	return slot;
> >> +}
> >> +
> >> +static void
> >> +kni_memzone_pool_dealloc(struct rte_kni_memzone_slot* slot) {
Generally we don't use "dealloc" like, how about "release"? Just want to
get it be similar to the existing code.

> >> +	rte_spinlock_lock(&kni_memzone_pool.mutex);
> >> +
> >> +	if(kni_memzone_pool.free)
> >> +		kni_memzone_pool.free_tail->next = slot;
> >> +	else
> >> +		kni_memzone_pool.free = slot;
> >> +
> >> +	kni_memzone_pool.free_tail = slot;
> >> +	slot->next = NULL;
> >> +
> >> +	rte_spinlock_unlock(&kni_memzone_pool.mutex);
> >> +}
> >> +
> >> +
> >> +/* Shall be called before any allocation happens */ void
> >> +rte_kni_init(unsigned int max_kni_ifaces) {
> >> +	unsigned i;
> >> +	struct rte_kni_memzone_slot* it;
> >> +	const struct rte_memzone *mz;
> >> +#define OBJNAMSIZ 32
> >> +	char obj_name[OBJNAMSIZ];
> >> +	char mz_name[RTE_MEMZONE_NAMESIZE];
> >> +
> >> +	if(max_kni_ifaces == 0) {
> >> +		//Panic
> >> +		RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d\n",
> >> +							max_kni_ifaces);
> >> +		rte_panic("Unable to initialize KNI\n");
> >> +	}
> >> +
> >> +	//Allocate slot objects
> >> +	kni_memzone_pool.slots = (struct
> >> rte_kni_memzone_slot*)rte_malloc(NULL,
> >> +					sizeof(struct rte_kni_memzone_slot) *
> >> +					max_kni_ifaces,
> >> +					0);
> >> +	KNI_MEM_CHECK(kni_memzone_pool.slots == NULL);
> >> +
> >> +	//Initialize general stuff
> >> +	kni_memzone_pool.initialized = 1;
> >> +	kni_memzone_pool.max_ifaces = max_kni_ifaces;
> >> +	kni_memzone_pool.free = &kni_memzone_pool.slots[0];
> >> +
> >> +	//Pre-allocate all memzones of the slots; panic on error
> >> +	for(i=0; i<max_kni_ifaces; i++) {
> >> +
> >> +		//Recover current slot
> >> +		it = &kni_memzone_pool.slots[i];
> >> +		it->id = i;
> >> +
> >> +		//Allocate KNI context
> >> +		snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%d", i);
> >> +		mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni),
> >> +					SOCKET_ID_ANY, 0);
> >> +		KNI_MEM_CHECK(mz == NULL);
> >> +		it->m_ctx = mz;
> >> +
> >> +		/* TX RING */
> >> +		snprintf(obj_name, OBJNAMSIZ, "kni_tx_%d", i);
> >> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
> >> +							SOCKET_ID_ANY, 0);
> >> +		KNI_MEM_CHECK(mz == NULL);
> >> +		it->m_tx_q = mz;
> >> +
> >> +		/* RX RING */
> >> +		snprintf(obj_name, OBJNAMSIZ, "kni_rx_%d", i);
> >> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
> >> +							SOCKET_ID_ANY, 0);
> >> +		KNI_MEM_CHECK(mz == NULL);
> >> +		it->m_rx_q = mz;
> >> +
> >> +		/* ALLOC RING */
> >> +		snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%d", i);
> >> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
> >> +							SOCKET_ID_ANY, 0);
> >> +		KNI_MEM_CHECK(mz == NULL);
> >> +		it->m_alloc_q = mz;
> >> +
> >> +		/* FREE RING */
> >> +		snprintf(obj_name, OBJNAMSIZ, "kni_free_%d", i);
> >> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
> >> +							SOCKET_ID_ANY, 0);
> >> +		KNI_MEM_CHECK(mz == NULL);
> >> +		it->m_free_q = mz;
> >> +
> >> +		/* Request RING */
> >> +		snprintf(obj_name, OBJNAMSIZ, "kni_req_%d", i);
> >> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
> >> +							SOCKET_ID_ANY, 0);
> >> +		KNI_MEM_CHECK(mz == NULL);
> >> +		it->m_req_q = mz;
> >> +
> >> +		/* Response RING */
> >> +		snprintf(obj_name, OBJNAMSIZ, "kni_resp_%d", i);
> >> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
> >> +							SOCKET_ID_ANY, 0);
> >> +		KNI_MEM_CHECK(mz == NULL);
> >> +		it->m_resp_q = mz;
> >> +
> >> +		/* Req/Resp sync mem area */
> >> +		snprintf(obj_name, OBJNAMSIZ, "kni_sync_%d", i);
> >> +		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
> >> +							SOCKET_ID_ANY, 0);
> >> +		KNI_MEM_CHECK(mz == NULL);
> >> +		it->m_sync_addr = mz;
> >> +
> >> +		if(i+1 == max_kni_ifaces) {
> >> +			it->next = NULL;
> >> +			kni_memzone_pool.free_tail = it;
> >> +		}else
> >> +			it->next = &kni_memzone_pool.slots[i+1];
> >> +	}
> >> +
> >> +	return;
> >> +
> >> +kni_fail:
> >> +	rte_panic("Unable to allocate memory for max_kni_ifaces:%d."
> >> +		"increase the amount of hugepages memory\n", max_kni_ifaces); }
> >> +
> >>   /* It is deprecated and just for backward compatibility */  struct
> >> rte_kni * rte_kni_create(uint8_t port_id, @@ -140,14 +328,20 @@
> >> rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
> >>   	struct rte_kni_device_info dev_info;
> >>   	struct rte_kni *ctx;
> >>   	char intf_name[RTE_KNI_NAMESIZE];
> >> -#define OBJNAMSIZ 32
> >> -	char obj_name[OBJNAMSIZ];
> >>   	char mz_name[RTE_MEMZONE_NAMESIZE];
> >>   	const struct rte_memzone *mz;
> >> +	struct rte_kni_memzone_slot* slot=NULL;
> >>
> >>   	if (!pktmbuf_pool || !conf || !conf->name[0])
> >>   		return NULL;
> >>
> >> +	/* Check inited */
> >> +	if (kni_memzone_pool.initialized != 1) {
> >> +		RTE_LOG(ERR, KNI, "KNI subsystem has not been initialized. "
> >> +				"Invoke rte_kni_init() first\n");
> >> +		return NULL;
> >> +	}
> >> +
> >>   	/* Check FD and open once */
> >>   	if (kni_fd < 0) {
> >>   		kni_fd = open("/dev/" KNI_DEVICE, O_RDWR); @@ -158,16
> +352,23 @@
> >> rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
> >>   		}
> >>   	}
> >>
> >> +	/* Get an available slot from the pool */
> >> +	slot = kni_memzone_pool_alloc();
> >> +	if(!slot) {
> >> +		RTE_LOG(ERR, KNI, "Cannot allocate more KNI interfaces; "
> >> +			"increase the number of max_kni_ifaces(current %d) or "
> >> +			"deallocate unusued ones.\n",
> >> +			kni_memzone_pool.max_ifaces);
> >> +		return NULL;
> >> +	}
> >> +
> >> +	//Recover ctx
> >> +	ctx = slot->m_ctx->addr;
> >>   	snprintf(intf_name, RTE_KNI_NAMESIZE, "%s", conf->name);
> >> -	snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%s",
> >> intf_name);
> >> -	mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni),
> >> -				SOCKET_ID_ANY, 0);
> >> -	KNI_MZ_CHECK(mz == NULL);
> >> -	ctx = mz->addr;
> >>
> >>   	if (ctx->in_use) {
> >>   		RTE_LOG(ERR, KNI, "KNI %s is in use\n", ctx->name);
> >> -		goto fail;
> >> +		return NULL;
> >>   	}
> >>   	memset(ctx, 0, sizeof(struct rte_kni));
> >>   	if (ops)
> >> @@ -190,83 +391,72 @@ rte_kni_alloc(struct rte_mempool
> *pktmbuf_pool,
> >>   	RTE_LOG(INFO, KNI, "pci: %02x:%02x:%02x \t %02x:%02x\n",
> >>   		dev_info.bus, dev_info.devid, dev_info.function,
> >>   			dev_info.vendor_id, dev_info.device_id);
> >> -
> >>   	/* TX RING */
> >> -	snprintf(obj_name, OBJNAMSIZ, "kni_tx_%s", intf_name);
> >> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
> >> 0);
> >> -	KNI_MZ_CHECK(mz == NULL);
> >> +	mz = slot->m_tx_q;
> >>   	ctx->tx_q = mz->addr;
> >>   	kni_fifo_init(ctx->tx_q, KNI_FIFO_COUNT_MAX);
> >>   	dev_info.tx_phys = mz->phys_addr;
> >>
> >>   	/* RX RING */
> >> -	snprintf(obj_name, OBJNAMSIZ, "kni_rx_%s", intf_name);
> >> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
> >> 0);
> >> -	KNI_MZ_CHECK(mz == NULL);
> >> +	mz = slot->m_rx_q;
> >>   	ctx->rx_q = mz->addr;
> >>   	kni_fifo_init(ctx->rx_q, KNI_FIFO_COUNT_MAX);
> >>   	dev_info.rx_phys = mz->phys_addr;
> >>
> >>   	/* ALLOC RING */
> >> -	snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%s", intf_name);
> >> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
> >> 0);
> >> -	KNI_MZ_CHECK(mz == NULL);
> >> +	mz = slot->m_alloc_q;
> >>   	ctx->alloc_q = mz->addr;
> >>   	kni_fifo_init(ctx->alloc_q, KNI_FIFO_COUNT_MAX);
> >>   	dev_info.alloc_phys = mz->phys_addr;
> >>
> >>   	/* FREE RING */
> >> -	snprintf(obj_name, OBJNAMSIZ, "kni_free_%s", intf_name);
> >> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
> >> 0);
> >> -	KNI_MZ_CHECK(mz == NULL);
> >> +	mz = slot->m_free_q;
> >>   	ctx->free_q = mz->addr;
> >>   	kni_fifo_init(ctx->free_q, KNI_FIFO_COUNT_MAX);
> >>   	dev_info.free_phys = mz->phys_addr;
> >>
> >>   	/* Request RING */
> >> -	snprintf(obj_name, OBJNAMSIZ, "kni_req_%s", intf_name);
> >> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
> >> 0);
> >> -	KNI_MZ_CHECK(mz == NULL);
> >> +	mz = slot->m_req_q;
> >>   	ctx->req_q = mz->addr;
> >>   	kni_fifo_init(ctx->req_q, KNI_FIFO_COUNT_MAX);
> >>   	dev_info.req_phys = mz->phys_addr;
> >>
> >>   	/* Response RING */
> >> -	snprintf(obj_name, OBJNAMSIZ, "kni_resp_%s", intf_name);
> >> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
> >> 0);
> >> -	KNI_MZ_CHECK(mz == NULL);
> >> +	mz = slot->m_resp_q;
> >>   	ctx->resp_q = mz->addr;
> >>   	kni_fifo_init(ctx->resp_q, KNI_FIFO_COUNT_MAX);
> >>   	dev_info.resp_phys = mz->phys_addr;
> >>
> >>   	/* Req/Resp sync mem area */
> >> -	snprintf(obj_name, OBJNAMSIZ, "kni_sync_%s", intf_name);
> >> -	mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY,
> >> 0);
> >> -	KNI_MZ_CHECK(mz == NULL);
> >> +	mz = slot->m_sync_addr;
> >>   	ctx->sync_addr = mz->addr;
> >>   	dev_info.sync_va = mz->addr;
> >>   	dev_info.sync_phys = mz->phys_addr;
> >>
> >> +
> >>   	/* MBUF mempool */
> >>   	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME,
> >>   		pktmbuf_pool->name);
> >>   	mz = rte_memzone_lookup(mz_name);
> >> -	KNI_MZ_CHECK(mz == NULL);
> >> +	KNI_MEM_CHECK(mz == NULL);
> >>   	dev_info.mbuf_va = mz->addr;
> >>   	dev_info.mbuf_phys = mz->phys_addr;
> >>   	ctx->pktmbuf_pool = pktmbuf_pool;
> >>   	ctx->group_id = conf->group_id;
> >> +	ctx->slot_id = slot->id;
> >>   	ctx->mbuf_size = conf->mbuf_size;
> >>
> >>   	ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
> >> -	KNI_MZ_CHECK(ret < 0);
> >> +	KNI_MEM_CHECK(ret < 0);
> >>
> >>   	ctx->in_use = 1;
> >>
> >>   	return ctx;
> >>
> >> -fail:
> >> -
> >> +kni_fail:
> >> +	if(slot)
> >> +		kni_memzone_pool_dealloc(&kni_memzone_pool.slots[slot->id]);
> >> +
> >>   	return NULL;
> >>   }
> >>
> >> @@ -287,6 +477,7 @@ int
> >>   rte_kni_release(struct rte_kni *kni)
> >>   {
> >>   	struct rte_kni_device_info dev_info;
> >> +	unsigned slot_id;
> >>
> >>   	if (!kni || !kni->in_use)
> >>   		return -1;
> >> @@ -302,8 +493,19 @@ rte_kni_release(struct rte_kni *kni)
> >>   	kni_free_fifo(kni->rx_q);
> >>   	kni_free_fifo(kni->alloc_q);
> >>   	kni_free_fifo(kni->free_q);
> >> +
> >> +	slot_id = kni->slot_id;
> >> +
> >> +	//Memset
> >>   	memset(kni, 0, sizeof(struct rte_kni));
> >>
> >> +	//Release memzone
> >> +	if(slot_id > kni_memzone_pool.max_ifaces) {
> >> +		rte_panic("KNI pool: corrupted slot ID: %d, max: %d\n",
> >> +			slot_id, kni_memzone_pool.max_ifaces);
> >> +	}
> >> +	kni_memzone_pool_dealloc(&kni_memzone_pool.slots[slot_id]);
> >> +
> >>   	return 0;
> >>   }
> >>
> >> @@ -437,23 +639,21 @@ rte_kni_get_port_id(struct rte_kni *kni)
> >> struct rte_kni *  rte_kni_get(const char *name)  {
> >> -	struct rte_kni *kni;
> >> -	const struct rte_memzone *mz;
> >> -	char mz_name[RTE_MEMZONE_NAMESIZE];
> >> -
> >> -	if (!name || !name[0])
> >> -		return NULL;
> >> -
> >> -	snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%s", name);
> >> -	mz = rte_memzone_lookup(mz_name);
> >> -	if (!mz)
> >> -		return NULL;
> >> -
> >> -	kni = mz->addr;
> >> -	if (!kni->in_use)
> >> -		return NULL;
> >> +	unsigned i;
> >> +	struct rte_kni_memzone_slot* it;
> >> +	struct rte_kni* kni;
> >> +
> >> +	//Could be improved perf-wise if necessary
> >> +	for(i=0; i<kni_memzone_pool.max_ifaces; i++) {
> >> +		it = &kni_memzone_pool.slots[i];
> >> +		if(it->in_use == 0)
> >> +			continue;
> >> +		kni = it->m_ctx->addr;
> >> +		if(strncmp(kni->name, name, RTE_KNI_NAMESIZE) == 0)
> >> +			return kni;
> >> +	}
> >>
> >> -	return kni;
> >> +	return NULL;
> >>   }
> >>
> >>   /*
> >> diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h
> >> index 1a0b004..0159a1d 100644
> >> --- a/lib/librte_kni/rte_kni.h
> >> +++ b/lib/librte_kni/rte_kni.h
> >> @@ -90,11 +90,27 @@ struct rte_kni_conf {  };
> >>
> >>   /**
> >> + * Initialize and preallocate KNI subsystem
> >> + *
> >> + * This function is to be executed on the MASTER lcore only, after
> >> +EAL
> >> + * initialization and before any KNI interface is attempted to be
> >> + * allocated
> >> + *
> >> + * @param max_kni_ifaces
> >> + *  The maximum number of KNI interfaces that can coexist
> >> +concurrently */ extern void rte_kni_init(unsigned int
> >> +max_kni_ifaces);
> >> +
> >> +
> >> +/**
> >>    * Allocate KNI interface according to the port id, mbuf size, mbuf pool,
> >>    * configurations and callbacks for kernel requests.The KNI interface
> created
> >>    * in the kernel space is the net interface the traditional Linux application
> >>    * talking to.
> >>    *
> >> + * The rte_kni_alloc shall not be called before rte_kni_init() has
> >> + been
> >> + * called. rte_kni_alloc is thread safe.
> >> + *
> >>    * @param pktmbuf_pool
> >>    *  The mempool for allocting mbufs for packets.
> >>    * @param conf
> >> @@ -138,6 +154,8 @@ extern struct rte_kni *rte_kni_create(uint8_t
> port_id,
> >>    * Release KNI interface according to the context. It will also release the
> >>    * paired KNI interface in kernel space. All processing on the specific KNI
> >>    * context need to be stopped before calling this interface.
> >> + *
> >> + * rte_kni_release is thread safe.
> >>    *
> >>    * @param kni
> >>    *  The pointer to the context of an existent KNI interface.
> >> --
> >> 1.7.10.4
> > Regards,
> > Helin
> >

Regards,
Helin

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
  2014-10-09  7:32     ` Zhang, Helin
@ 2014-10-09  7:52       ` Marc Sune
  2014-10-09  8:33         ` Zhang, Helin
  0 siblings, 1 reply; 13+ messages in thread
From: Marc Sune @ 2014-10-09  7:52 UTC (permalink / raw)
  To: Zhang, Helin; +Cc: dev

Hi Helin,

Thanks for the comments. Inline and  snipped

On 09/10/14 09:32, Zhang, Helin wrote:
> Hi Marc
>
> Good explanation! Thank you very much! I add more comments for your code changes.
> One common comment for annotation, in DPDK, we do not use "//" to start annotation.

OK, "//" => "/* */"

> [snip]
>>>> It adds a new API call, rte_kni_init(max_kni_ifaces) that shall be
>>>> called before any call to rte_kni_alloc() if KNI is used.
> To avoid the additional interface, this initialization works can be done during the
> first time of calling rte_kni_alloc(), please refer to how it opens kni_fd ("/dev/kni").
> Also I think there should be some de-initialization works should be done in rte_kni_close().
How is rte_kni_alloc() supposed to know the size of the pool that has to 
be pre-allocated (max_kni_ifaces)?

I don't think the approach of pre-allocating on the first 
rte_kni_alloc() would work (I already discarded this approach before 
implementing the patch), because this would imply we need a define of 
#define MAX_KNI_IFACES during compilation time of DPDK, and the 
pre-allocation is highly dependent on the amount of hugepages memory you 
have and the usage of the KNI interfaces the applications wants to do. 
We can easily end up with DPDK users having to tweak the default 
MAX_KNI_IFACES before compiling DPDK every time, which is definetely not 
desirable IMHO.

For rte_kni_close(), the pool is static (incl. the slot struct), and the 
memzones cannot be unreserved, hence there is nothing AFAIU to 
de-initialize; what do you mean specifically?
>
>>>> Signed-off-by: Marc Sune <marc.sune@bisdn.de>
>>>> ---
>>>>    lib/librte_kni/rte_kni.c |  302
>>>> ++++++++++++++++++++++++++++++++++++++--------
>>>>    lib/librte_kni/rte_kni.h |   18 +++
>>>>    2 files changed, 269 insertions(+), 51 deletions(-)
>>>>
>>>> diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
>>>> index
>>>> 76feef4..df55789 100644
>>>> --- a/lib/librte_kni/rte_kni.c
>>>> +++ b/lib/librte_kni/rte_kni.c
>>>> @@ -40,6 +40,7 @@
>>>>    #include <unistd.h>
>>>>    #include <sys/ioctl.h>
>>>>
>>>> +#include <rte_spinlock.h>
>>>>    #include <rte_string_fns.h>
>>>>    #include <rte_ethdev.h>
>>>>    #include <rte_malloc.h>
>>>> @@ -58,7 +59,7 @@
>>>>
>>>>    #define KNI_REQUEST_MBUF_NUM_MAX      32
>>>>
>>>> -#define KNI_MZ_CHECK(mz) do { if (mz) goto fail; } while (0)
>>>> +#define KNI_MEM_CHECK(cond) do { if (cond) goto kni_fail; } while
>>>> +(0)
>>>>
>>>>    /**
>>>>     * KNI context
>>>> @@ -66,6 +67,7 @@
>>>>    struct rte_kni {
>>>>    	char name[RTE_KNI_NAMESIZE];        /**< KNI interface name
>> */
>>>>    	uint16_t group_id;                  /**< Group ID of KNI devices
>> */
>>>> +	unsigned slot_id;                   /**< KNI pool slot ID */
> It would be better to use uint16_t or similar, as that's DPDK style.
I usually use uint16_t (or unsigned int, not unsinged) but the original 
source code (rte_kni.c I think, but I could have looked up something 
else) was using already unsigned, so I tried to mimic that. Please 
clarify which is the standard!!
>
>>>>    	struct rte_mempool *pktmbuf_pool;   /**< pkt mbuf mempool */
>>>>    	unsigned mbuf_size;                 /**< mbuf size */
>>>>
>>>> @@ -88,10 +90,48 @@ enum kni_ops_status {
>>>>    	KNI_REQ_REGISTERED,
>>>>    };
>>>>
>>>> +/**
>>>> +* KNI memzone pool slot
>>>> +*/
>>>> +struct rte_kni_memzone_slot{
>>>> +	unsigned id;
>>>> +	uint8_t in_use : 1;                    /**< slot in use */
>>>> +
>>>> +	//Memzones
> The comments style is not DPDK style, please try to use DPDK style as others.

I will use in v2:

/*
*
*/

  and /* */ for inline.
>
>>>> +	const struct rte_memzone *m_ctx;       /**< KNI ctx */
>>>> +	const struct rte_memzone *m_tx_q;      /**< TX queue */
>>>> +	const struct rte_memzone *m_rx_q;      /**< RX queue */
>>>> +	const struct rte_memzone *m_alloc_q;   /**< Allocated mbufs queue */
>>>> +	const struct rte_memzone *m_free_q;    /**< To be freed mbufs queue
>>>> */
>>>> +	const struct rte_memzone *m_req_q;     /**< Request queue */
>>>> +	const struct rte_memzone *m_resp_q;    /**< Response queue */
>>>> +	const struct rte_memzone *m_sync_addr;
>>>> +
>>>> +	/* Free linked list */
>>>> +	struct rte_kni_memzone_slot *next;     /**< Next slot link.list */
> For the linked list management, "TAILQ_" might be a better choice. Please check
> if it can be usable here.

I will check it for patch v2

>
>>>> +};
>>>> +
>>>> +/**
>>>> +* KNI memzone pool
>>>> +*/
>>>> +struct rte_kni_memzone_pool{
>>>> +	uint8_t initialized : 1;            /**< Global KNI pool init flag */
>>>> +
>>>> +	unsigned max_ifaces;                /**< Max. num of KNI ifaces */
>>>> +	struct rte_kni_memzone_slot *slots;        /**< Pool slots */
>>>> +	rte_spinlock_t mutex;               /**< alloc/relase mutex */
>>>> +
>>>> +	//Free memzone slots linked-list
>>>> +	struct rte_kni_memzone_slot *free;         /**< First empty slot */
>>>> +	struct rte_kni_memzone_slot *free_tail;    /**< Last empty slot */
>>>> +};
>>>> +
>>>> +
>>>>    static void kni_free_mbufs(struct rte_kni *kni);  static void
>>>> kni_allocate_mbufs(struct rte_kni *kni);
>>>>
>>>>    static volatile int kni_fd = -1;
>>>> +static struct rte_kni_memzone_pool kni_memzone_pool = {0};
>>>>
>>>>    static const struct rte_memzone *
>>>>    kni_memzone_reserve(const char *name, size_t len, int socket_id, @@
>>>> -105,6 +145,154 @@ kni_memzone_reserve(const char *name, size_t len,
>>>> int socket_id,
>>>>    	return mz;
>>>>    }
>>>>
>>>> +/* Pool mgmt */
>>>> +static struct rte_kni_memzone_slot*
>>>> +kni_memzone_pool_alloc(void)
>>>> +{
>>>> +	struct rte_kni_memzone_slot* slot;
>>>> +
>>>> +	rte_spinlock_lock(&kni_memzone_pool.mutex);
>>>> +
>>>> +	if(!kni_memzone_pool.free) {
>>>> +		rte_spinlock_unlock(&kni_memzone_pool.mutex);
>>>> +		return NULL;
>>>> +	}
>>>> +
>>>> +	slot = kni_memzone_pool.free;
>>>> +	kni_memzone_pool.free = slot->next;
>>>> +
>>>> +	if(!kni_memzone_pool.free)
>>>> +		kni_memzone_pool.free_tail = NULL;
>>>> +
>>>> +	rte_spinlock_unlock(&kni_memzone_pool.mutex);
>>>> +
>>>> +	return slot;
>>>> +}
>>>> +
>>>> +static void
>>>> +kni_memzone_pool_dealloc(struct rte_kni_memzone_slot* slot) {
> Generally we don't use "dealloc" like, how about "release"? Just want to
> get it be similar to the existing code.

I have no problem on changing that. v2


Thanks and regards

> [snip]
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
  2014-10-09  7:52       ` Marc Sune
@ 2014-10-09  8:33         ` Zhang, Helin
  2014-10-09  8:45           ` Marc Sune
  0 siblings, 1 reply; 13+ messages in thread
From: Zhang, Helin @ 2014-10-09  8:33 UTC (permalink / raw)
  To: Marc Sune; +Cc: dev

Hi Marc

Thanks for your clarification! I added more comments.

> -----Original Message-----
> From: Marc Sune [mailto:marc.sune@bisdn.de]
> Sent: Thursday, October 9, 2014 3:53 PM
> To: Zhang, Helin
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
> 
> Hi Helin,
> 
> Thanks for the comments. Inline and  snipped
> 
> On 09/10/14 09:32, Zhang, Helin wrote:
> > Hi Marc
> >
> > Good explanation! Thank you very much! I add more comments for your code
> changes.
> > One common comment for annotation, in DPDK, we do not use "//" to start
> annotation.
> 
> OK, "//" => "/* */"
> 
> > [snip]
> >>>> It adds a new API call, rte_kni_init(max_kni_ifaces) that shall be
> >>>> called before any call to rte_kni_alloc() if KNI is used.
> > To avoid the additional interface, this initialization works can be
> > done during the first time of calling rte_kni_alloc(), please refer to how it
> opens kni_fd ("/dev/kni").
> > Also I think there should be some de-initialization works should be done in
> rte_kni_close().
> How is rte_kni_alloc() supposed to know the size of the pool that has to be
> pre-allocated (max_kni_ifaces)?
Add it into 'struct rte_kni_conf', also a default one might be needed if 0 is
configured by the user app.

> 
> I don't think the approach of pre-allocating on the first
> rte_kni_alloc() would work (I already discarded this approach before
> implementing the patch), because this would imply we need a define of #define
> MAX_KNI_IFACES during compilation time of DPDK, and the pre-allocation is
> highly dependent on the amount of hugepages memory you have and the usage
> of the KNI interfaces the applications wants to do.
> We can easily end up with DPDK users having to tweak the default
> MAX_KNI_IFACES before compiling DPDK every time, which is definetely not
> desirable IMHO.
Your idea is good! My point is it possible to avoid adding new interface, then no
changes are needed in user app.

> 
> For rte_kni_close(), the pool is static (incl. the slot struct), and the memzones
> cannot be unreserved, hence there is nothing AFAIU to de-initialize; what do
> you mean specifically?
You can see that rte_kni_close() will be called in XEN (#ifdef RTE_LIBRTE_XEN_DOM0),
XEN support is different from standard Linux support.

> >
> >>>> Signed-off-by: Marc Sune <marc.sune@bisdn.de>
> >>>> ---
> >>>>    lib/librte_kni/rte_kni.c |  302
> >>>> ++++++++++++++++++++++++++++++++++++++--------
> >>>>    lib/librte_kni/rte_kni.h |   18 +++
> >>>>    2 files changed, 269 insertions(+), 51 deletions(-)
> >>>>
> >>>> diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
> >>>> index
> >>>> 76feef4..df55789 100644
> >>>> --- a/lib/librte_kni/rte_kni.c
> >>>> +++ b/lib/librte_kni/rte_kni.c
> >>>> @@ -40,6 +40,7 @@
> >>>>    #include <unistd.h>
> >>>>    #include <sys/ioctl.h>
> >>>>
> >>>> +#include <rte_spinlock.h>
> >>>>    #include <rte_string_fns.h>
> >>>>    #include <rte_ethdev.h>
> >>>>    #include <rte_malloc.h>
> >>>> @@ -58,7 +59,7 @@
> >>>>
> >>>>    #define KNI_REQUEST_MBUF_NUM_MAX      32
> >>>>
> >>>> -#define KNI_MZ_CHECK(mz) do { if (mz) goto fail; } while (0)
> >>>> +#define KNI_MEM_CHECK(cond) do { if (cond) goto kni_fail; } while
> >>>> +(0)
> >>>>
> >>>>    /**
> >>>>     * KNI context
> >>>> @@ -66,6 +67,7 @@
> >>>>    struct rte_kni {
> >>>>    	char name[RTE_KNI_NAMESIZE];        /**< KNI interface
> name
> >> */
> >>>>    	uint16_t group_id;                  /**< Group ID of KNI
> devices
> >> */
> >>>> +	unsigned slot_id;                   /**< KNI pool slot ID */
> > It would be better to use uint16_t or similar, as that's DPDK style.
> I usually use uint16_t (or unsigned int, not unsinged) but the original source
> code (rte_kni.c I think, but I could have looked up something
> else) was using already unsigned, so I tried to mimic that. Please clarify which
> is the standard!!
I can see most of code lines are using uint16_t/uint32_t or similar, but some may
use others which is possibly required by kernel space. KNI will transfer a struct
from user space to kernel space.
My observation is that DPDK tries to use uint16_t like types as most as possible,
though no rule for that I guess.

> >
> >>>>    	struct rte_mempool *pktmbuf_pool;   /**< pkt mbuf mempool
> */
> >>>>    	unsigned mbuf_size;                 /**< mbuf size */
> >>>>
> >>>> @@ -88,10 +90,48 @@ enum kni_ops_status {
> >>>>    	KNI_REQ_REGISTERED,
> >>>>    };
> >>>>
> >>>> +/**
> >>>> +* KNI memzone pool slot
> >>>> +*/
> >>>> +struct rte_kni_memzone_slot{
> >>>> +	unsigned id;
> >>>> +	uint8_t in_use : 1;                    /**< slot in use */
> >>>> +
> >>>> +	//Memzones
> > The comments style is not DPDK style, please try to use DPDK style as others.
> 
> I will use in v2:
> 
> /*
> *
> */
> 
>   and /* */ for inline.
> >
> >>>> +	const struct rte_memzone *m_ctx;       /**< KNI ctx */
> >>>> +	const struct rte_memzone *m_tx_q;      /**< TX queue */
> >>>> +	const struct rte_memzone *m_rx_q;      /**< RX queue */
> >>>> +	const struct rte_memzone *m_alloc_q;   /**< Allocated mbufs
> queue */
> >>>> +	const struct rte_memzone *m_free_q;    /**< To be freed mbufs
> queue
> >>>> */
> >>>> +	const struct rte_memzone *m_req_q;     /**< Request queue */
> >>>> +	const struct rte_memzone *m_resp_q;    /**< Response queue */
> >>>> +	const struct rte_memzone *m_sync_addr;
> >>>> +
> >>>> +	/* Free linked list */
> >>>> +	struct rte_kni_memzone_slot *next;     /**< Next slot link.list */
> > For the linked list management, "TAILQ_" might be a better choice.
> > Please check if it can be usable here.
> 
> I will check it for patch v2
> 
> >
> >>>> +};
> >>>> +
> >>>> +/**
> >>>> +* KNI memzone pool
> >>>> +*/
> >>>> +struct rte_kni_memzone_pool{
> >>>> +	uint8_t initialized : 1;            /**< Global KNI pool init flag */
> >>>> +
> >>>> +	unsigned max_ifaces;                /**< Max. num of KNI ifaces
> */
> >>>> +	struct rte_kni_memzone_slot *slots;        /**< Pool slots */
> >>>> +	rte_spinlock_t mutex;               /**< alloc/relase mutex */
> >>>> +
> >>>> +	//Free memzone slots linked-list
> >>>> +	struct rte_kni_memzone_slot *free;         /**< First empty slot
> */
> >>>> +	struct rte_kni_memzone_slot *free_tail;    /**< Last empty slot */
> >>>> +};
> >>>> +
> >>>> +
> >>>>    static void kni_free_mbufs(struct rte_kni *kni);  static void
> >>>> kni_allocate_mbufs(struct rte_kni *kni);
> >>>>
> >>>>    static volatile int kni_fd = -1;
> >>>> +static struct rte_kni_memzone_pool kni_memzone_pool = {0};
> >>>>
> >>>>    static const struct rte_memzone *
> >>>>    kni_memzone_reserve(const char *name, size_t len, int socket_id,
> >>>> @@
> >>>> -105,6 +145,154 @@ kni_memzone_reserve(const char *name, size_t
> >>>> len, int socket_id,
> >>>>    	return mz;
> >>>>    }
> >>>>
> >>>> +/* Pool mgmt */
> >>>> +static struct rte_kni_memzone_slot*
> >>>> +kni_memzone_pool_alloc(void)
> >>>> +{
> >>>> +	struct rte_kni_memzone_slot* slot;
> >>>> +
> >>>> +	rte_spinlock_lock(&kni_memzone_pool.mutex);
> >>>> +
> >>>> +	if(!kni_memzone_pool.free) {
> >>>> +		rte_spinlock_unlock(&kni_memzone_pool.mutex);
> >>>> +		return NULL;
> >>>> +	}
> >>>> +
> >>>> +	slot = kni_memzone_pool.free;
> >>>> +	kni_memzone_pool.free = slot->next;
> >>>> +
> >>>> +	if(!kni_memzone_pool.free)
> >>>> +		kni_memzone_pool.free_tail = NULL;
> >>>> +
> >>>> +	rte_spinlock_unlock(&kni_memzone_pool.mutex);
> >>>> +
> >>>> +	return slot;
> >>>> +}
> >>>> +
> >>>> +static void
> >>>> +kni_memzone_pool_dealloc(struct rte_kni_memzone_slot* slot) {
> > Generally we don't use "dealloc" like, how about "release"? Just want
> > to get it be similar to the existing code.
> 
> I have no problem on changing that. v2
> 
> 
> Thanks and regards
> 
> > [snip]
> >

Regards,
Helin

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
  2014-10-09  8:33         ` Zhang, Helin
@ 2014-10-09  8:45           ` Marc Sune
  2014-10-09  8:57             ` Zhang, Helin
  0 siblings, 1 reply; 13+ messages in thread
From: Marc Sune @ 2014-10-09  8:45 UTC (permalink / raw)
  To: Zhang, Helin; +Cc: dev

Hi Helin,

Inline and snipped. Thanks for the additional comments.

On 09/10/14 10:33, Zhang, Helin wrote:
> [snip]
>>> [snip]
>>>>>> It adds a new API call, rte_kni_init(max_kni_ifaces) that shall be
>>>>>> called before any call to rte_kni_alloc() if KNI is used.
>>> To avoid the additional interface, this initialization works can be
>>> done during the first time of calling rte_kni_alloc(), please refer to how it
>> opens kni_fd ("/dev/kni").
>>> Also I think there should be some de-initialization works should be done in
>> rte_kni_close().
>> How is rte_kni_alloc() supposed to know the size of the pool that has to be
>> pre-allocated (max_kni_ifaces)?
> Add it into 'struct rte_kni_conf', also a default one might be needed if 0 is
> configured by the user app.

I disagree with this approach :) . struct rte_kni_conf is a 
per-interface configuration struct, and the mempool is shared between 
all the alloc/release of the KNI interfaces.

I don't like the approach to mix one-time-use (first alloc) parameters 
that affect the entire KNI system into the struct rte_kni_conf.

>> I don't think the approach of pre-allocating on the first
>> rte_kni_alloc() would work (I already discarded this approach before
>> implementing the patch), because this would imply we need a define of #define
>> MAX_KNI_IFACES during compilation time of DPDK, and the pre-allocation is
>> highly dependent on the amount of hugepages memory you have and the usage
>> of the KNI interfaces the applications wants to do.
>> We can easily end up with DPDK users having to tweak the default
>> MAX_KNI_IFACES before compiling DPDK every time, which is definetely not
>> desirable IMHO.
> Your idea is good! My point is it possible to avoid adding new interface, then no
> changes are needed in user app.

I see the current approach the most clean and comprehensive (from the 
perspective of the user of the library) approach. Do you have any other 
proposal? I am open to discuss and eventually implement it if it turns 
out to be better.

>
>> For rte_kni_close(), the pool is static (incl. the slot struct), and the memzones
>> cannot be unreserved, hence there is nothing AFAIU to de-initialize; what do
>> you mean specifically?
> You can see that rte_kni_close() will be called in XEN (#ifdef RTE_LIBRTE_XEN_DOM0),
> XEN support is different from standard Linux support.

OK it is called, but what is the (extra) state that I should 
de-initialize that is coming from this patch? I cannot see any state 
I've added I have to de-initialize here.

Many thanks
Marc

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
  2014-10-09  8:45           ` Marc Sune
@ 2014-10-09  8:57             ` Zhang, Helin
  2014-10-09 10:15               ` Marc Sune
  0 siblings, 1 reply; 13+ messages in thread
From: Zhang, Helin @ 2014-10-09  8:57 UTC (permalink / raw)
  To: Marc Sune; +Cc: dev

Hi Marc

Ohh, I made a stupid proposal of adding a field per port. Sorry! I still have more comments inlined.

> -----Original Message-----
> From: Marc Sune [mailto:marc.sune@bisdn.de]
> Sent: Thursday, October 9, 2014 4:45 PM
> To: Zhang, Helin
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
> 
> Hi Helin,
> 
> Inline and snipped. Thanks for the additional comments.
> 
> On 09/10/14 10:33, Zhang, Helin wrote:
> > [snip]
> >>> [snip]
> >>>>>> It adds a new API call, rte_kni_init(max_kni_ifaces) that shall
> >>>>>> be called before any call to rte_kni_alloc() if KNI is used.
> >>> To avoid the additional interface, this initialization works can be
> >>> done during the first time of calling rte_kni_alloc(), please refer
> >>> to how it
> >> opens kni_fd ("/dev/kni").
> >>> Also I think there should be some de-initialization works should be
> >>> done in
> >> rte_kni_close().
> >> How is rte_kni_alloc() supposed to know the size of the pool that has
> >> to be pre-allocated (max_kni_ifaces)?
> > Add it into 'struct rte_kni_conf', also a default one might be needed
> > if 0 is configured by the user app.
> 
> I disagree with this approach :) . struct rte_kni_conf is a per-interface
> configuration struct, and the mempool is shared between all the alloc/release
> of the KNI interfaces.
> 
> I don't like the approach to mix one-time-use (first alloc) parameters that affect
> the entire KNI system into the struct rte_kni_conf.
I agree with you, this approach is a bit stupid.

> 
> >> I don't think the approach of pre-allocating on the first
> >> rte_kni_alloc() would work (I already discarded this approach before
> >> implementing the patch), because this would imply we need a define of
> >> #define MAX_KNI_IFACES during compilation time of DPDK, and the
> >> pre-allocation is highly dependent on the amount of hugepages memory
> >> you have and the usage of the KNI interfaces the applications wants to do.
> >> We can easily end up with DPDK users having to tweak the default
> >> MAX_KNI_IFACES before compiling DPDK every time, which is definetely
> >> not desirable IMHO.
> > Your idea is good! My point is it possible to avoid adding new
> > interface, then no changes are needed in user app.
> 
> I see the current approach the most clean and comprehensive (from the
> perspective of the user of the library) approach. Do you have any other
> proposal? I am open to discuss and eventually implement it if it turns out to be
> better.
How about add a new compile config item in config files? I still think we should avoid adding more interfaces if possible. :)
> 
> >
> >> For rte_kni_close(), the pool is static (incl. the slot struct), and
> >> the memzones cannot be unreserved, hence there is nothing AFAIU to
> >> de-initialize; what do you mean specifically?
> > You can see that rte_kni_close() will be called in XEN (#ifdef
> > RTE_LIBRTE_XEN_DOM0), XEN support is different from standard Linux
> support.
> 
> OK it is called, but what is the (extra) state that I should de-initialize that is
> coming from this patch? I cannot see any state I've added I have to de-initialize
> here.
Just suggest you think about that. maybe nothing needs to be added there. :)

> 
> Many thanks
> Marc

Thanks!

Regards,
Helin

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
  2014-10-09  8:57             ` Zhang, Helin
@ 2014-10-09 10:15               ` Marc Sune
  2014-10-10  5:25                 ` Zhang, Helin
  0 siblings, 1 reply; 13+ messages in thread
From: Marc Sune @ 2014-10-09 10:15 UTC (permalink / raw)
  To: Zhang, Helin; +Cc: dev

Hi Helin,

inline and snipped. Let me know after reading this mail if you believe I 
can already submit the v2 with the changes you suggested.

On 09/10/14 10:57, Zhang, Helin wrote:
> [snip]
>>>> I don't think the approach of pre-allocating on the first
>>>> rte_kni_alloc() would work (I already discarded this approach before
>>>> implementing the patch), because this would imply we need a define of
>>>> #define MAX_KNI_IFACES during compilation time of DPDK, and the
>>>> pre-allocation is highly dependent on the amount of hugepages memory
>>>> you have and the usage of the KNI interfaces the applications wants to do.
>>>> We can easily end up with DPDK users having to tweak the default
>>>> MAX_KNI_IFACES before compiling DPDK every time, which is definetely
>>>> not desirable IMHO.
>>> Your idea is good! My point is it possible to avoid adding new
>>> interface, then no changes are needed in user app.
>> I see the current approach the most clean and comprehensive (from the
>> perspective of the user of the library) approach. Do you have any other
>> proposal? I am open to discuss and eventually implement it if it turns out to be
>> better.
> How about add a new compile config item in config files? I still think we should avoid adding more interfaces if possible. :)

In my original answer to your comment here cited starting by "I don't 
think the approach of pre-allocating on the first rte_kni_alloc()..." I 
explain why I think this is not a good idea.

A config.g #define approach would be highly dependent on hugepages 
memory size and the usage the applications wants to do with KNI 
interfaces. Specially due to the former, I don't think it is a good 
idea. DPDK doesn't force any user to edit manually the config.h AFAIK, 
unless you want to do some performance optimizations or debug. And I 
think it is a good approach and I would like to keep it and not break it 
with this patch

Any parameter that depends on DPDK applications so far, so really out of 
the scope of DPDK itself (like the size of the pool parameter is), is 
handled via an API call. So I see rte_kni_init() as the natural way to 
do so, specially by the fact that rte_kni_close() API call already exists.

>>>> For rte_kni_close(), the pool is static (incl. the slot struct), and
>>>> the memzones cannot be unreserved, hence there is nothing AFAIU to
>>>> de-initialize; what do you mean specifically?
>>> You can see that rte_kni_close() will be called in XEN (#ifdef
>>> RTE_LIBRTE_XEN_DOM0), XEN support is different from standard Linux
>> support.
>>
>> OK it is called, but what is the (extra) state that I should de-initialize that is
>> coming from this patch? I cannot see any state I've added I have to de-initialize
>> here.
> Just suggest you think about that. maybe nothing needs to be added there. :)

I will definitely double-check before submitting v2.

Thanks for the suggestions
Marc

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
  2014-10-09 10:15               ` Marc Sune
@ 2014-10-10  5:25                 ` Zhang, Helin
  2014-10-10  6:37                   ` Marc Sune
  0 siblings, 1 reply; 13+ messages in thread
From: Zhang, Helin @ 2014-10-10  5:25 UTC (permalink / raw)
  To: Marc Sune; +Cc: dev

Hi Marc

More comments added.

> -----Original Message-----
> From: Marc Sune [mailto:marc.sune@bisdn.de]
> Sent: Thursday, October 9, 2014 6:16 PM
> To: Zhang, Helin
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
> 
> Hi Helin,
> 
> inline and snipped. Let me know after reading this mail if you believe I can
> already submit the v2 with the changes you suggested.
> 
> On 09/10/14 10:57, Zhang, Helin wrote:
> > [snip]
> >>>> I don't think the approach of pre-allocating on the first
> >>>> rte_kni_alloc() would work (I already discarded this approach
> >>>> before implementing the patch), because this would imply we need a
> >>>> define of #define MAX_KNI_IFACES during compilation time of DPDK,
> >>>> and the pre-allocation is highly dependent on the amount of
> >>>> hugepages memory you have and the usage of the KNI interfaces the
> applications wants to do.
> >>>> We can easily end up with DPDK users having to tweak the default
> >>>> MAX_KNI_IFACES before compiling DPDK every time, which is
> >>>> definetely not desirable IMHO.
> >>> Your idea is good! My point is it possible to avoid adding new
> >>> interface, then no changes are needed in user app.
> >> I see the current approach the most clean and comprehensive (from the
> >> perspective of the user of the library) approach. Do you have any
> >> other proposal? I am open to discuss and eventually implement it if
> >> it turns out to be better.
> > How about add a new compile config item in config files? I still think
> > we should avoid adding more interfaces if possible. :)
> 
> In my original answer to your comment here cited starting by "I don't think the
> approach of pre-allocating on the first rte_kni_alloc()..." I explain why I think
> this is not a good idea.
I understood your concern. It is not bad of adding a config item in config files
(config/config_linux), as it already has a lot of compile time configurations in them.
For a specific platform, the maximum number of KNI interfaces should be fixed,
and no need to be changed frequently.

> 
> A config.g #define approach would be highly dependent on hugepages memory
> size and the usage the applications wants to do with KNI interfaces. Specially
> due to the former, I don't think it is a good idea. DPDK doesn't force any user to
> edit manually the config.h AFAIK, unless you want to do some performance
> optimizations or debug. And I think it is a good approach and I would like to
> keep it and not break it with this patch
No need to edit config.h, just modify config/config_linux or config/config_bsd.

> 
> Any parameter that depends on DPDK applications so far, so really out of the
> scope of DPDK itself (like the size of the pool parameter is), is handled via an
> API call. So I see rte_kni_init() as the natural way to do so, specially by the fact
> that rte_kni_close() API call already exists.
I agree that your solution is good, I am just thinking if we can make less changes
for API level.

> 
> >>>> For rte_kni_close(), the pool is static (incl. the slot struct),
> >>>> and the memzones cannot be unreserved, hence there is nothing AFAIU
> >>>> to de-initialize; what do you mean specifically?
> >>> You can see that rte_kni_close() will be called in XEN (#ifdef
> >>> RTE_LIBRTE_XEN_DOM0), XEN support is different from standard Linux
> >> support.
> >>
> >> OK it is called, but what is the (extra) state that I should
> >> de-initialize that is coming from this patch? I cannot see any state
> >> I've added I have to de-initialize here.
> > Just suggest you think about that. maybe nothing needs to be added
> > there. :)
> 
> I will definitely double-check before submitting v2.
> 
> Thanks for the suggestions
> Marc

Regards,
Helin

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
  2014-10-10  5:25                 ` Zhang, Helin
@ 2014-10-10  6:37                   ` Marc Sune
  2014-10-10  7:35                     ` Zhang, Helin
  0 siblings, 1 reply; 13+ messages in thread
From: Marc Sune @ 2014-10-10  6:37 UTC (permalink / raw)
  To: Zhang, Helin; +Cc: dev

Hi Helin,

On 10/10/14 07:25, Zhang, Helin wrote:
> Hi Marc
>
> More comments added.
>
>> [snip]
>>>>>> We can easily end up with DPDK users having to tweak the default
>>>>>> MAX_KNI_IFACES before compiling DPDK every time, which is
>>>>>> definetely not desirable IMHO.
>>>>> Your idea is good! My point is it possible to avoid adding new
>>>>> interface, then no changes are needed in user app.
>>>> I see the current approach the most clean and comprehensive (from the
>>>> perspective of the user of the library) approach. Do you have any
>>>> other proposal? I am open to discuss and eventually implement it if
>>>> it turns out to be better.
>>> How about add a new compile config item in config files? I still think
>>> we should avoid adding more interfaces if possible. :)
>> In my original answer to your comment here cited starting by "I don't think the
>> approach of pre-allocating on the first rte_kni_alloc()..." I explain why I think
>> this is not a good idea.
> I understood your concern. It is not bad of adding a config item in config files
> (config/config_linux), as it already has a lot of compile time configurations in them.
> For a specific platform, the maximum number of KNI interfaces should be fixed,
> and no need to be changed frequently.
rte_kni_init() should be staying. Actually the asymmetry of the API 
nowadays (no rte_kni_init, because fd is created during first alloc but 
an rte_kni_close) looks weird to me. Just an aside question, not related 
to this patch, why was the KNI fd not closed in the last rte_kni_release 
to be consistent?
>
>> A config.g #define approach would be highly dependent on hugepages memory
>> size and the usage the applications wants to do with KNI interfaces. Specially
>> due to the former, I don't think it is a good idea. DPDK doesn't force any user to
>> edit manually the config.h AFAIK, unless you want to do some performance
>> optimizations or debug. And I think it is a good approach and I would like to
>> keep it and not break it with this patch
> No need to edit config.h, just modify config/config_linux or config/config_bsd.
This is what I meant, all the config_*.h
>> Any parameter that depends on DPDK applications so far, so really out of the
>> scope of DPDK itself (like the size of the pool parameter is), is handled via an
>> API call. So I see rte_kni_init() as the natural way to do so, specially by the fact
>> that rte_kni_close() API call already exists.
> I agree that your solution is good, I am just thinking if we can make less changes
> for API level.

I can understand the reluctance for adding new API calls, but, let me 
double check, as I am not sure you understood my point:

If we set it in the config_*.h, and we set MAX_NUM_OF_KNI to a value 
whatever, 8, 16... 128..., it is quite possible that a lot of users of 
DPDK that will use the KNI (only those) get run-time errors since the 
memzones cannot be pre-allocated (out of memory). Memzones are 
preallocated at rte_kni_init() (or at first alloc, as per what you are 
suggesting). Moreover, the user would have to go and change (e.g. 
reduce) the MAX_NUM_OF_KNI in the config_*.h and recompile DPDK. I don't 
think that's what we want.

Marc

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
  2014-10-10  6:37                   ` Marc Sune
@ 2014-10-10  7:35                     ` Zhang, Helin
  2014-10-10  9:02                       ` Marc Sune
  0 siblings, 1 reply; 13+ messages in thread
From: Zhang, Helin @ 2014-10-10  7:35 UTC (permalink / raw)
  To: Marc Sune; +Cc: dev

Hi Marc

> -----Original Message-----
> From: Marc Sune [mailto:marc.sune@bisdn.de]
> Sent: Friday, October 10, 2014 2:38 PM
> To: Zhang, Helin
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
> 
> Hi Helin,
> 
> On 10/10/14 07:25, Zhang, Helin wrote:
> > Hi Marc
> >
> > More comments added.
> >
> >> [snip]
> >>>>>> We can easily end up with DPDK users having to tweak the default
> >>>>>> MAX_KNI_IFACES before compiling DPDK every time, which is
> >>>>>> definetely not desirable IMHO.
> >>>>> Your idea is good! My point is it possible to avoid adding new
> >>>>> interface, then no changes are needed in user app.
> >>>> I see the current approach the most clean and comprehensive (from
> >>>> the perspective of the user of the library) approach. Do you have
> >>>> any other proposal? I am open to discuss and eventually implement
> >>>> it if it turns out to be better.
> >>> How about add a new compile config item in config files? I still
> >>> think we should avoid adding more interfaces if possible. :)
> >> In my original answer to your comment here cited starting by "I don't
> >> think the approach of pre-allocating on the first rte_kni_alloc()..."
> >> I explain why I think this is not a good idea.
> > I understood your concern. It is not bad of adding a config item in
> > config files (config/config_linux), as it already has a lot of compile time
> configurations in them.
> > For a specific platform, the maximum number of KNI interfaces should
> > be fixed, and no need to be changed frequently.
> rte_kni_init() should be staying. Actually the asymmetry of the API nowadays
> (no rte_kni_init, because fd is created during first alloc but an rte_kni_close)
> looks weird to me. Just an aside question, not related to this patch, why was
> the KNI fd not closed in the last rte_kni_release to be consistent?
Initially there was no rte_kni_close() which was later added for supporting XEN
specifically.
If KNI needs an init function, it might need to be put in rte_eal_init() like other
modules do.

> >
> >> A config.g #define approach would be highly dependent on hugepages
> >> memory size and the usage the applications wants to do with KNI
> >> interfaces. Specially due to the former, I don't think it is a good
> >> idea. DPDK doesn't force any user to edit manually the config.h
> >> AFAIK, unless you want to do some performance optimizations or debug.
> >> And I think it is a good approach and I would like to keep it and not
> >> break it with this patch
> > No need to edit config.h, just modify config/config_linux or config/config_bsd.
> This is what I meant, all the config_*.h
> >> Any parameter that depends on DPDK applications so far, so really out
> >> of the scope of DPDK itself (like the size of the pool parameter is),
> >> is handled via an API call. So I see rte_kni_init() as the natural
> >> way to do so, specially by the fact that rte_kni_close() API call already exists.
> > I agree that your solution is good, I am just thinking if we can make
> > less changes for API level.
> 
> I can understand the reluctance for adding new API calls, but, let me double
> check, as I am not sure you understood my point:
> 
> If we set it in the config_*.h, and we set MAX_NUM_OF_KNI to a value
> whatever, 8, 16... 128..., it is quite possible that a lot of users of DPDK that will
> use the KNI (only those) get run-time errors since the memzones cannot be
> pre-allocated (out of memory). Memzones are preallocated at rte_kni_init() (or
> at first alloc, as per what you are suggesting). Moreover, the user would have
> to go and change (e.g.
> reduce) the MAX_NUM_OF_KNI in the config_*.h and recompile DPDK. I don't
> think that's what we want.
Actually the end user doesn't know the mapping between hugepage memory size
and the maximum number of KNI. Right? Giving 1G memory, end user doesn't know
how many KNI interfaces it can support. So the maximum number of KNI interfaces
still needs to be tuned.
I think the default number can be tuned to a reasonable small to support most of
cases, e.g. 4 or 8. And I don't think that number needs to be changed at runtime.

> 
> Marc

Regards,
Helin

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release
  2014-10-10  7:35                     ` Zhang, Helin
@ 2014-10-10  9:02                       ` Marc Sune
  0 siblings, 0 replies; 13+ messages in thread
From: Marc Sune @ 2014-10-10  9:02 UTC (permalink / raw)
  To: Zhang, Helin; +Cc: dev

Helin

n 10/10/14 09:35, Zhang, Helin wrote:
> Hi Marc
>
>> [snip]
>> rte_kni_init() should be staying. Actually the asymmetry of the API nowadays
>> (no rte_kni_init, because fd is created during first alloc but an rte_kni_close)
>> looks weird to me. Just an aside question, not related to this patch, why was
>> the KNI fd not closed in the last rte_kni_release to be consistent?
> Initially there was no rte_kni_close() which was later added for supporting XEN
> specifically.
> If KNI needs an init function, it might need to be put in rte_eal_init() like other
> modules do.

Well, calling rte_kni_init() always allocates memzones that if the 
user(=DPDK user, not the user of the final application) doesn't need to 
use KNI is wasted memory.

I would not put it there, unless we add a flag in rte_eal_init() to 
enable/disable KNI subsystem and tune the pre-allocation of 
max_num_ifaces in runtime, without the necessity to recompile DPDK. More 
below.

>
>>>> A config.g #define approach would be highly dependent on hugepages
>>>> memory size and the usage the applications wants to do with KNI
>>>> interfaces. Specially due to the former, I don't think it is a good
>>>> idea. DPDK doesn't force any user to edit manually the config.h
>>>> AFAIK, unless you want to do some performance optimizations or debug.
>>>> And I think it is a good approach and I would like to keep it and not
>>>> break it with this patch
>>> No need to edit config.h, just modify config/config_linux or config/config_bsd.
>> This is what I meant, all the config_*.h
>>>> Any parameter that depends on DPDK applications so far, so really out
>>>> of the scope of DPDK itself (like the size of the pool parameter is),
>>>> is handled via an API call. So I see rte_kni_init() as the natural
>>>> way to do so, specially by the fact that rte_kni_close() API call already exists.
>>> I agree that your solution is good, I am just thinking if we can make
>>> less changes for API level.
>> I can understand the reluctance for adding new API calls, but, let me double
>> check, as I am not sure you understood my point:
>>
>> If we set it in the config_*.h, and we set MAX_NUM_OF_KNI to a value
>> whatever, 8, 16... 128..., it is quite possible that a lot of users of DPDK that will
>> use the KNI (only those) get run-time errors since the memzones cannot be
>> pre-allocated (out of memory). Memzones are preallocated at rte_kni_init() (or
>> at first alloc, as per what you are suggesting). Moreover, the user would have
>> to go and change (e.g.
>> reduce) the MAX_NUM_OF_KNI in the config_*.h and recompile DPDK. I don't
>> think that's what we want.
> Actually the end user doesn't know the mapping between hugepage memory size
> and the maximum number of KNI.
>   Right? Giving 1G memory, end user doesn't know
> how many KNI interfaces it can support. So the maximum number of KNI interfaces
> still needs to be tuned.

I don't get what you mean. Using rte_kni_init you can be sure that the 
application will be able to pre-allocate the necessary memory to not 
fail during the life-time of an application or die otherwise. If it 
cannot, the user knows what to do => increase memory.

The user doesn't have to start tweaking and playing with DPDK config_*.h 
files, neither get errors at runtime, neither has to recompile DPDK if 
suddenly a certain application wants to be able to create 64 KNI 
interfaces instead of 8.

The creation of all KNI interfaces at boot time is only one use case, a 
degenerate case, which is I guess what you still have in mind.

Our application (and others as a general case) may create, destroy, 
create, destroy during the (big eventually) lifetime of the application. 
You don't want the application to fail during operation due to an 
improper compile flags of DPDK (the define that you propose of 
MAX_NUM_IFACES).

The user needs to be able to control how many memzones are 
pre-allocated, and this has a direct relation with the amount of 
hugepages memory available.

I have the patch v2 with your comments sorted out except this difference 
of opinion with rte_kni_init. I can circulate it and comment on it.

I would also like to know if anyone else besides Helin&I in the mailing 
list has any opinion on this.

thanks
Marc

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2014-10-10  8:54 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-29 10:15 [dpdk-dev] [PATCH] KNI: use a memzone pool for KNI alloc/release Marc Sune
2014-10-09  6:01 ` Zhang, Helin
2014-10-09  7:05   ` Marc Sune
2014-10-09  7:32     ` Zhang, Helin
2014-10-09  7:52       ` Marc Sune
2014-10-09  8:33         ` Zhang, Helin
2014-10-09  8:45           ` Marc Sune
2014-10-09  8:57             ` Zhang, Helin
2014-10-09 10:15               ` Marc Sune
2014-10-10  5:25                 ` Zhang, Helin
2014-10-10  6:37                   ` Marc Sune
2014-10-10  7:35                     ` Zhang, Helin
2014-10-10  9:02                       ` Marc Sune

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).