DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] common/cnxk: change MCAM entries management scheme
@ 2021-10-05  2:55 psatheesh
  2021-10-19 19:05 ` Jerin Jacob
  0 siblings, 1 reply; 2+ messages in thread
From: psatheesh @ 2021-10-05  2:55 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
  Cc: dev, Satheesh Paul

From: Satheesh Paul <psatheesh@marvell.com>

This patch removes the MCAM preallocation scheme. The free
entry cache is removed and for every flow created, an MCAM
allocation request is made to the kernel. Each priority level
has a list of MCAM entries. For every flow rule added, the
MCAM entry obtained from kernel is checked if it is at the
correct user specified priority. If not, the existing rules
are moved across MCAM entries so that the user specified
priority is maintained.

Signed-off-by: Satheesh Paul <psatheesh@marvell.com>
Reviewed-by: Kiran Kumar Kokkilagadda <kirankumark@marvell.com>
---
 drivers/common/cnxk/roc_npc.c       | 112 +----
 drivers/common/cnxk/roc_npc_mcam.c  |  24 +-
 drivers/common/cnxk/roc_npc_priv.h  |  33 +-
 drivers/common/cnxk/roc_npc_utils.c | 645 +++++++++++++++-------------
 4 files changed, 387 insertions(+), 427 deletions(-)

diff --git a/drivers/common/cnxk/roc_npc.c b/drivers/common/cnxk/roc_npc.c
index b724ff9401..e878bbc8e2 100644
--- a/drivers/common/cnxk/roc_npc.c
+++ b/drivers/common/cnxk/roc_npc.c
@@ -179,46 +179,6 @@ roc_npc_init(struct roc_npc *roc_npc)
 		return rc;
 	}
 
-	sz = npc->flow_max_priority * sizeof(struct npc_mcam_ents_info);
-	npc->flow_entry_info = plt_zmalloc(sz, 0);
-	if (npc->flow_entry_info == NULL) {
-		plt_err("flow_entry_info alloc failed");
-		rc = NPC_ERR_NO_MEM;
-		goto done;
-	}
-
-	sz = npc->flow_max_priority * sizeof(struct plt_bitmap *);
-	npc->free_entries = plt_zmalloc(sz, 0);
-	if (npc->free_entries == NULL) {
-		plt_err("free_entries alloc failed");
-		rc = NPC_ERR_NO_MEM;
-		goto done;
-	}
-
-	sz = npc->flow_max_priority * sizeof(struct plt_bitmap *);
-	npc->free_entries_rev = plt_zmalloc(sz, 0);
-	if (npc->free_entries_rev == NULL) {
-		plt_err("free_entries_rev alloc failed");
-		rc = NPC_ERR_NO_MEM;
-		goto done;
-	}
-
-	sz = npc->flow_max_priority * sizeof(struct plt_bitmap *);
-	npc->live_entries = plt_zmalloc(sz, 0);
-	if (npc->live_entries == NULL) {
-		plt_err("live_entries alloc failed");
-		rc = NPC_ERR_NO_MEM;
-		goto done;
-	}
-
-	sz = npc->flow_max_priority * sizeof(struct plt_bitmap *);
-	npc->live_entries_rev = plt_zmalloc(sz, 0);
-	if (npc->live_entries_rev == NULL) {
-		plt_err("live_entries_rev alloc failed");
-		rc = NPC_ERR_NO_MEM;
-		goto done;
-	}
-
 	sz = npc->flow_max_priority * sizeof(struct npc_flow_list);
 	npc->flow_list = plt_zmalloc(sz, 0);
 	if (npc->flow_list == NULL) {
@@ -227,30 +187,18 @@ roc_npc_init(struct roc_npc *roc_npc)
 		goto done;
 	}
 
+	sz = npc->flow_max_priority * sizeof(struct npc_prio_flow_list_head);
+	npc->prio_flow_list = plt_zmalloc(sz, 0);
+	if (npc->prio_flow_list == NULL) {
+		plt_err("prio_flow_list alloc failed");
+		rc = NPC_ERR_NO_MEM;
+		goto done;
+	}
+
 	npc_mem = mem;
 	for (idx = 0; idx < npc->flow_max_priority; idx++) {
 		TAILQ_INIT(&npc->flow_list[idx]);
-
-		npc->free_entries[idx] =
-			plt_bitmap_init(npc->mcam_entries, mem, bmap_sz);
-		mem += bmap_sz;
-
-		npc->free_entries_rev[idx] =
-			plt_bitmap_init(npc->mcam_entries, mem, bmap_sz);
-		mem += bmap_sz;
-
-		npc->live_entries[idx] =
-			plt_bitmap_init(npc->mcam_entries, mem, bmap_sz);
-		mem += bmap_sz;
-
-		npc->live_entries_rev[idx] =
-			plt_bitmap_init(npc->mcam_entries, mem, bmap_sz);
-		mem += bmap_sz;
-
-		npc->flow_entry_info[idx].free_ent = 0;
-		npc->flow_entry_info[idx].live_ent = 0;
-		npc->flow_entry_info[idx].max_id = 0;
-		npc->flow_entry_info[idx].min_id = ~(0);
+		TAILQ_INIT(&npc->prio_flow_list[idx]);
 	}
 
 	npc->rss_grps = NPC_RSS_GRPS;
@@ -281,16 +229,8 @@ roc_npc_init(struct roc_npc *roc_npc)
 done:
 	if (npc->flow_list)
 		plt_free(npc->flow_list);
-	if (npc->live_entries_rev)
-		plt_free(npc->live_entries_rev);
-	if (npc->live_entries)
-		plt_free(npc->live_entries);
-	if (npc->free_entries_rev)
-		plt_free(npc->free_entries_rev);
-	if (npc->free_entries)
-		plt_free(npc->free_entries);
-	if (npc->flow_entry_info)
-		plt_free(npc->flow_entry_info);
+	if (npc->prio_flow_list)
+		plt_free(npc->prio_flow_list);
 	if (npc_mem)
 		plt_free(npc_mem);
 	return rc;
@@ -313,29 +253,9 @@ roc_npc_fini(struct roc_npc *roc_npc)
 		npc->flow_list = NULL;
 	}
 
-	if (npc->live_entries_rev) {
-		plt_free(npc->live_entries_rev);
-		npc->live_entries_rev = NULL;
-	}
-
-	if (npc->live_entries) {
-		plt_free(npc->live_entries);
-		npc->live_entries = NULL;
-	}
-
-	if (npc->free_entries_rev) {
-		plt_free(npc->free_entries_rev);
-		npc->free_entries_rev = NULL;
-	}
-
-	if (npc->free_entries) {
-		plt_free(npc->free_entries);
-		npc->free_entries = NULL;
-	}
-
-	if (npc->flow_entry_info) {
-		plt_free(npc->flow_entry_info);
-		npc->flow_entry_info = NULL;
+	if (npc->prio_flow_list) {
+		plt_free(npc->prio_flow_list);
+		npc->prio_flow_list = NULL;
 	}
 
 	return 0;
@@ -1269,7 +1189,6 @@ int
 roc_npc_flow_destroy(struct roc_npc *roc_npc, struct roc_npc_flow *flow)
 {
 	struct npc *npc = roc_npc_to_npc_priv(roc_npc);
-	struct plt_bitmap *bmap;
 	int rc;
 
 	rc = npc_rss_group_free(npc, flow);
@@ -1290,8 +1209,7 @@ roc_npc_flow_destroy(struct roc_npc *roc_npc, struct roc_npc_flow *flow)
 
 	TAILQ_REMOVE(&npc->flow_list[flow->priority], flow, next);
 
-	bmap = npc->live_entries[flow->priority];
-	plt_bitmap_clear(bmap, flow->mcam_id);
+	npc_delete_prio_list_entry(npc, flow);
 
 	plt_free(flow);
 	return 0;
diff --git a/drivers/common/cnxk/roc_npc_mcam.c b/drivers/common/cnxk/roc_npc_mcam.c
index 91ed2dd511..ba7f89b45b 100644
--- a/drivers/common/cnxk/roc_npc_mcam.c
+++ b/drivers/common/cnxk/roc_npc_mcam.c
@@ -520,7 +520,7 @@ npc_mcam_alloc_and_write(struct npc *npc, struct roc_npc_flow *flow,
 			return rc;
 	}
 
-	entry = npc_check_preallocated_entry_cache(mbox, flow, npc);
+	entry = npc_get_free_mcam_entry(mbox, flow, npc);
 	if (entry < 0) {
 		if (use_ctr)
 			npc_mcam_free_counter(npc, ctr);
@@ -587,6 +587,9 @@ npc_mcam_alloc_and_write(struct npc *npc, struct roc_npc_flow *flow,
 		pf_func = plt_cpu_to_be_16(pf_func);
 		req->entry_data.kw[0] |= ((uint64_t)pf_func << 32);
 		req->entry_data.kw_mask[0] |= ((uint64_t)0xffff << 32);
+
+		flow->mcam_data[0] |= ((uint64_t)pf_func << 32);
+		flow->mcam_mask[0] |= ((uint64_t)0xffff << 32);
 	}
 
 	rc = mbox_process_msg(mbox, (void *)&rsp);
@@ -594,6 +597,7 @@ npc_mcam_alloc_and_write(struct npc *npc, struct roc_npc_flow *flow,
 		return rc;
 
 	flow->mcam_id = entry;
+
 	if (use_ctr)
 		flow->ctr_id = ctr;
 	return 0;
@@ -697,20 +701,9 @@ npc_program_mcam(struct npc *npc, struct npc_parse_state *pst, bool mcam_alloc)
 int
 npc_flow_free_all_resources(struct npc *npc)
 {
-	struct npc_mcam_ents_info *info;
 	struct roc_npc_flow *flow;
-	struct plt_bitmap *bmap;
-	int entry_count = 0;
 	int rc, idx;
 
-	for (idx = 0; idx < npc->flow_max_priority; idx++) {
-		info = &npc->flow_entry_info[idx];
-		entry_count += info->live_ent;
-	}
-
-	if (entry_count == 0)
-		return 0;
-
 	/* Free all MCAM entries allocated */
 	rc = npc_mcam_free_all_entries(npc);
 
@@ -721,14 +714,11 @@ npc_flow_free_all_resources(struct npc *npc)
 			if (flow->ctr_id != NPC_COUNTER_NONE)
 				rc |= npc_mcam_free_counter(npc, flow->ctr_id);
 
+			npc_delete_prio_list_entry(npc, flow);
+
 			TAILQ_REMOVE(&npc->flow_list[idx], flow, next);
 			plt_free(flow);
-			bmap = npc->live_entries[flow->priority];
-			plt_bitmap_clear(bmap, flow->mcam_id);
 		}
-		info = &npc->flow_entry_info[idx];
-		info->free_ent = 0;
-		info->live_ent = 0;
 	}
 	return rc;
 }
diff --git a/drivers/common/cnxk/roc_npc_priv.h b/drivers/common/cnxk/roc_npc_priv.h
index 2567846a77..712302bc5c 100644
--- a/drivers/common/cnxk/roc_npc_priv.h
+++ b/drivers/common/cnxk/roc_npc_priv.h
@@ -344,14 +344,13 @@ struct npc_get_datax_cfg {
 
 TAILQ_HEAD(npc_flow_list, roc_npc_flow);
 
-struct npc_mcam_ents_info {
-	/* Current max & min values of mcam index */
-	uint32_t max_id;
-	uint32_t min_id;
-	uint32_t free_ent;
-	uint32_t live_ent;
+struct npc_prio_flow_entry {
+	struct roc_npc_flow *flow;
+	TAILQ_ENTRY(npc_prio_flow_entry) next;
 };
 
+TAILQ_HEAD(npc_prio_flow_list_head, npc_prio_flow_entry);
+
 struct npc {
 	struct mbox *mbox;			/* Mbox */
 	uint32_t keyx_supp_nmask[NPC_MAX_INTF]; /* nibble mask */
@@ -371,22 +370,8 @@ struct npc {
 	npc_dxcfg_t prx_dxcfg;	     /* intf, lid, lt, extract */
 	npc_fxcfg_t prx_fxcfg;	     /* Flag extract */
 	npc_ld_flags_t prx_lfcfg;    /* KEX LD_Flags CFG */
-	/* mcam entry info per priority level: both free & in-use */
-	struct npc_mcam_ents_info *flow_entry_info;
-	/* Bitmap of free preallocated entries in ascending index &
-	 * descending priority
-	 */
-	struct plt_bitmap **free_entries;
-	/* Bitmap of free preallocated entries in descending index &
-	 * ascending priority
-	 */
-	struct plt_bitmap **free_entries_rev;
-	/* Bitmap of live entries in ascending index & descending priority */
-	struct plt_bitmap **live_entries;
-	/* Bitmap of live entries in descending index & ascending priority */
-	struct plt_bitmap **live_entries_rev;
-	/* Priority bucket wise tail queue of all npc_flow resources */
 	struct npc_flow_list *flow_list;
+	struct npc_prio_flow_list_head *prio_flow_list;
 	struct plt_bitmap *rss_grp_entries;
 };
 
@@ -431,9 +416,9 @@ int npc_parse_lf(struct npc_parse_state *pst);
 int npc_parse_lg(struct npc_parse_state *pst);
 int npc_parse_lh(struct npc_parse_state *pst);
 int npc_mcam_fetch_kex_cfg(struct npc *npc);
-int npc_check_preallocated_entry_cache(struct mbox *mbox,
-				       struct roc_npc_flow *flow,
-				       struct npc *npc);
+int npc_get_free_mcam_entry(struct mbox *mbox, struct roc_npc_flow *flow,
+			    struct npc *npc);
+void npc_delete_prio_list_entry(struct npc *npc, struct roc_npc_flow *flow);
 int npc_flow_free_all_resources(struct npc *npc);
 const struct roc_npc_item_info *
 npc_parse_skip_void_and_any_items(const struct roc_npc_item_info *pattern);
diff --git a/drivers/common/cnxk/roc_npc_utils.c b/drivers/common/cnxk/roc_npc_utils.c
index 5fcb56c35b..ed0ef5c462 100644
--- a/drivers/common/cnxk/roc_npc_utils.c
+++ b/drivers/common/cnxk/roc_npc_utils.c
@@ -259,48 +259,56 @@ npc_update_parse_state(struct npc_parse_state *pst,
 }
 
 static int
-npc_first_set_bit(uint64_t slab)
+npc_initialise_mcam_entry(struct npc *npc, struct roc_npc_flow *flow,
+			  int mcam_id)
 {
-	int num = 0;
+	struct npc_mcam_write_entry_req *req;
+	struct npc_mcam_write_entry_rsq *rsp;
+	int rc = 0, idx;
 
-	if ((slab & 0xffffffff) == 0) {
-		num += 32;
-		slab >>= 32;
-	}
-	if ((slab & 0xffff) == 0) {
-		num += 16;
-		slab >>= 16;
-	}
-	if ((slab & 0xff) == 0) {
-		num += 8;
-		slab >>= 8;
-	}
-	if ((slab & 0xf) == 0) {
-		num += 4;
-		slab >>= 4;
+	req = mbox_alloc_msg_npc_mcam_write_entry(npc->mbox);
+	if (req == NULL)
+		return -ENOSPC;
+	req->set_cntr = 0;
+	req->cntr = 0;
+	req->entry = mcam_id;
+
+	req->intf = (flow->nix_intf == NIX_INTF_RX) ? NPC_MCAM_RX : NPC_MCAM_TX;
+	req->enable_entry = 1;
+	req->entry_data.action = flow->npc_action;
+	req->entry_data.vtag_action = flow->vtag_action;
+
+	for (idx = 0; idx < ROC_NPC_MAX_MCAM_WIDTH_DWORDS; idx++) {
+		req->entry_data.kw[idx] = 0x0;
+		req->entry_data.kw_mask[idx] = 0x0;
 	}
-	if ((slab & 0x3) == 0) {
-		num += 2;
-		slab >>= 2;
+
+	if (flow->nix_intf == NIX_INTF_RX) {
+		req->entry_data.kw[0] |= (uint64_t)npc->channel;
+		req->entry_data.kw_mask[0] |= (BIT_ULL(12) - 1);
+	} else {
+		uint16_t pf_func = (flow->npc_action >> 4) & 0xffff;
+
+		pf_func = plt_cpu_to_be_16(pf_func);
+		req->entry_data.kw[0] |= ((uint64_t)pf_func << 32);
+		req->entry_data.kw_mask[0] |= ((uint64_t)0xffff << 32);
 	}
-	if ((slab & 0x1) == 0)
-		num += 1;
 
-	return num;
+	rc = mbox_process_msg(npc->mbox, (void *)&rsp);
+	if (rc != 0) {
+		plt_err("npc: mcam initialisation write failed");
+		return rc;
+	}
+	return 0;
 }
 
 static int
-npc_shift_lv_ent(struct mbox *mbox, struct roc_npc_flow *flow, struct npc *npc,
-		 uint32_t old_ent, uint32_t new_ent)
+npc_shift_mcam_entry(struct mbox *mbox, uint16_t old_ent, uint16_t new_ent)
 {
 	struct npc_mcam_shift_entry_req *req;
 	struct npc_mcam_shift_entry_rsp *rsp;
-	struct npc_flow_list *list;
-	struct roc_npc_flow *flow_iter;
 	int rc = -ENOSPC;
 
-	list = &npc->flow_list[flow->priority];
-
 	/* Old entry is disabled & it's contents are moved to new_entry,
 	 * new entry is enabled finally.
 	 */
@@ -315,323 +323,382 @@ npc_shift_lv_ent(struct mbox *mbox, struct roc_npc_flow *flow, struct npc *npc,
 	if (rc)
 		return rc;
 
-	/* Remove old node from list */
-	TAILQ_FOREACH(flow_iter, list, next) {
-		if (flow_iter->mcam_id == old_ent)
-			TAILQ_REMOVE(list, flow_iter, next);
-	}
-
-	/* Insert node with new mcam id at right place */
-	TAILQ_FOREACH(flow_iter, list, next) {
-		if (flow_iter->mcam_id > new_ent)
-			TAILQ_INSERT_BEFORE(flow_iter, flow, next);
-	}
-	return rc;
+	return 0;
 }
 
-/* Exchange all required entries with a given priority level */
+enum SHIFT_DIR {
+	SLIDE_ENTRIES_TO_LOWER_INDEX,
+	SLIDE_ENTRIES_TO_HIGHER_INDEX,
+};
+
 static int
-npc_shift_ent(struct mbox *mbox, struct roc_npc_flow *flow, struct npc *npc,
-	      struct npc_mcam_alloc_entry_rsp *rsp, int dir, int prio_lvl)
+npc_slide_mcam_entries(struct mbox *mbox, struct npc *npc, int prio,
+		       uint16_t *free_mcam_id, int dir)
 {
-	struct plt_bitmap *fr_bmp, *fr_bmp_rev, *lv_bmp, *lv_bmp_rev, *bmp;
-	uint32_t e_fr = 0, e_lv = 0, e, e_id = 0, mcam_entries;
-	uint64_t fr_bit_pos = 0, lv_bit_pos = 0, bit_pos = 0;
-	/* Bit position within the slab */
-	uint32_t sl_fr_bit_off = 0, sl_lv_bit_off = 0;
-	/* Overall bit position of the start of slab */
-	/* free & live entry index */
-	int rc_fr = 0, rc_lv = 0, rc = 0, idx = 0;
-	struct npc_mcam_ents_info *ent_info;
-	/* free & live bitmap slab */
-	uint64_t sl_fr = 0, sl_lv = 0, *sl;
-
-	fr_bmp = npc->free_entries[prio_lvl];
-	fr_bmp_rev = npc->free_entries_rev[prio_lvl];
-	lv_bmp = npc->live_entries[prio_lvl];
-	lv_bmp_rev = npc->live_entries_rev[prio_lvl];
-	ent_info = &npc->flow_entry_info[prio_lvl];
-	mcam_entries = npc->mcam_entries;
-
-	/* New entries allocated are always contiguous, but older entries
-	 * already in free/live bitmap can be non-contiguous: so return
-	 * shifted entries should be in non-contiguous format.
-	 */
-	while (idx <= rsp->count) {
-		if (!sl_fr && !sl_lv) {
-			/* Lower index elements to be exchanged */
-			if (dir < 0) {
-				rc_fr = plt_bitmap_scan(fr_bmp, &e_fr, &sl_fr);
-				rc_lv = plt_bitmap_scan(lv_bmp, &e_lv, &sl_lv);
-			} else {
-				rc_fr = plt_bitmap_scan(fr_bmp_rev,
-							&sl_fr_bit_off, &sl_fr);
-				rc_lv = plt_bitmap_scan(lv_bmp_rev,
-							&sl_lv_bit_off, &sl_lv);
-			}
-		}
-
-		if (rc_fr) {
-			fr_bit_pos = npc_first_set_bit(sl_fr);
-			e_fr = sl_fr_bit_off + fr_bit_pos;
-		} else {
-			e_fr = ~(0);
-		}
-
-		if (rc_lv) {
-			lv_bit_pos = npc_first_set_bit(sl_lv);
-			e_lv = sl_lv_bit_off + lv_bit_pos;
-		} else {
-			e_lv = ~(0);
-		}
-
-		/* First entry is from free_bmap */
-		if (e_fr < e_lv) {
-			bmp = fr_bmp;
-			e = e_fr;
-			sl = &sl_fr;
-			bit_pos = fr_bit_pos;
-			if (dir > 0)
-				e_id = mcam_entries - e - 1;
-			else
-				e_id = e;
-		} else {
-			bmp = lv_bmp;
-			e = e_lv;
-			sl = &sl_lv;
-			bit_pos = lv_bit_pos;
-			if (dir > 0)
-				e_id = mcam_entries - e - 1;
-			else
-				e_id = e;
-
-			if (idx < rsp->count)
-				rc = npc_shift_lv_ent(mbox, flow, npc, e_id,
-						      rsp->entry + idx);
+	uint16_t to_mcam_id = 0, from_mcam_id = 0;
+	struct npc_prio_flow_list_head *list;
+	struct npc_prio_flow_entry *curr = 0;
+	int rc = 0;
+
+	list = &npc->prio_flow_list[prio];
+
+	to_mcam_id = *free_mcam_id;
+	if (dir == SLIDE_ENTRIES_TO_HIGHER_INDEX)
+		curr = TAILQ_LAST(list, npc_prio_flow_list_head);
+	else if (dir == SLIDE_ENTRIES_TO_LOWER_INDEX)
+		curr = TAILQ_FIRST(list);
+
+	while (curr) {
+		from_mcam_id = curr->flow->mcam_id;
+		if ((dir == SLIDE_ENTRIES_TO_HIGHER_INDEX &&
+		     from_mcam_id < to_mcam_id) ||
+		    (dir == SLIDE_ENTRIES_TO_LOWER_INDEX &&
+		     from_mcam_id > to_mcam_id)) {
+			/* Newly allocated entry and the source entry given to
+			 * npc_mcam_shift_entry_req will be in disabled state.
+			 * Initialise and enable before moving an entry into
+			 * this mcam.
+			 */
+			rc = npc_initialise_mcam_entry(npc, curr->flow,
+						       to_mcam_id);
+			if (rc)
+				return rc;
+			rc = npc_shift_mcam_entry(mbox, from_mcam_id,
+						  to_mcam_id);
+			if (rc)
+				return rc;
+			curr->flow->mcam_id = to_mcam_id;
+			to_mcam_id = from_mcam_id;
 		}
 
-		plt_bitmap_clear(bmp, e);
-		plt_bitmap_set(bmp, rsp->entry + idx);
-		/* Update entry list, use non-contiguous
-		 * list now.
-		 */
-		rsp->entry_list[idx] = e_id;
-		*sl &= ~(1UL << bit_pos);
+		if (dir == SLIDE_ENTRIES_TO_HIGHER_INDEX)
+			curr = TAILQ_PREV(curr, npc_prio_flow_list_head, next);
+		else if (dir == SLIDE_ENTRIES_TO_LOWER_INDEX)
+			curr = TAILQ_NEXT(curr, next);
+	}
 
-		/* Update min & max entry identifiers in current
-		 * priority level.
-		 */
-		if (dir < 0) {
-			ent_info->max_id = rsp->entry + idx;
-			ent_info->min_id = e_id;
-		} else {
-			ent_info->max_id = e_id;
-			ent_info->min_id = rsp->entry;
-		}
+	*free_mcam_id = from_mcam_id;
 
-		idx++;
-	}
-	return rc;
+	return 0;
 }
 
-/* Validate if newly allocated entries lie in the correct priority zone
- * since NPC_MCAM_LOWER_PRIO & NPC_MCAM_HIGHER_PRIO don't ensure zone accuracy.
- * If not properly aligned, shift entries to do so
+/*
+ * The mcam_alloc request is first made with NPC_MCAM_LOWER_PRIO with the last
+ * entry in the requested priority level as the reference entry. If it fails,
+ * the alloc request is retried with NPC_MCAM_HIGHER_PRIO with the first entry
+ * in the next lower priority level as the reference entry. After obtaining
+ * the free MCAM from kernel, we check if it is at the right user requested
+ * priority level. If not, the flow rules are moved across MCAM entries till
+ * the user requested priority levels are met.
+ * The MCAM sorting algorithm works as below.
+ * For any given free MCAM obtained from the kernel, there are 3 possibilities.
+ * Case 1:
+ * There are entries belonging to higher user priority level (numerically
+ * lesser) in higher mcam indices. In this case, the entries with higher user
+ * priority are slided towards lower indices and a free entry is created in the
+ * higher indices.
+ * Example:
+ * Assume free entry = 1610, user requested priority = 2 and
+ * max user priority levels = 5 with below entries in respective priority
+ * levels.
+ * 0: 1630, 1635, 1641
+ * 1: 1646, 1650, 1651
+ * 2: 1652, 1655, 1660
+ * 3: 1661, 1662, 1663, 1664
+ * 4: 1665, 1667, 1670
+ *
+ * Entries (1630, 1635, 1641, 1646, 1650, 1651) have to be slided down towards
+ * lower indices.
+ * Shifting sequence will be as below:
+ *     1610 <- 1630 <- 1635 <- 1641 <- 1646 <- 1650 <- 1651
+ * Entry 1651 will be free-ed for writing the new flow. This entry will now
+ * become the head of priority level 2.
+ *
+ * Case 2:
+ * There are entries belonging to lower user priority level (numerically
+ * bigger) in lower mcam indices. In this case, the entries with lower user
+ * priority are slided towards higher indices and a free entry is created in the
+ * lower indices.
+ *
+ * Example:
+ * free entry = 1653, user requested priority = 0
+ * 0: 1630, 1635, 1641
+ * 1: 1646, 1650, 1651
+ * 2: 1652, 1655, 1660
+ * 3: 1661, 1662, 1663, 1664
+ * 4: 1665, 1667, 1670
+ *
+ * Entries (1646, 1650, 1651, 1652) have to be slided up towards higher
+ * indices.
+ * Shifting sequence will be as below:
+ *     1646 -> 1650 -> 1651 -> 1652 -> 1653
+ * Entry 1646 will be free-ed for writing the new flow. This entry will now
+ * become the last element in priority level 0.
+ *
+ * Case 3:
+ * Free mcam is at the right place, ie, all higher user priority level
+ * mcams lie in lower indices and all lower user priority level mcams lie in
+ * higher mcam indices.
+ *
+ * The priority level lists are scanned first for case (1) and if the
+ * condition is found true, case(2) is skipped because they are mutually
+ * exclusive. For example, consider below state.
+ * 0: 1630, 1635, 1641
+ * 1: 1646, 1650, 1651
+ * 2: 1652, 1655, 1660
+ * 3: 1661, 1662, 1663, 1664
+ * 4: 1665, 1667, 1670
+ * free entry = 1610, user requested priority = 2
+ *
+ * Case 1: Here the condition is;
+ * "if (requested_prio > prio_idx && free_mcam < tail->flow->mcam_id ){}"
+ * If this condition is true, it means at some higher priority level than
+ * requested priority level, there are entries at lower indices than the given
+ * free mcam. That is, we have found in levels 0,1 there is an mcam X which is
+ * greater than 1610.
+ * If, for any free entry and user req prio, the above condition is true, then
+ * the below case(2) condition will always be false since the lists are kept
+ * sorted. The case(2) condition is;
+ *  "if (requested_prio < prio_idx && free_mcam > head->flow->mcam_id){}"
+ * There can't be entries at lower indices at priority level higher
+ * than the requested priority level. That is, here, at levels 3 & 4 there
+ * cannot be any entry greater than 1610. Because all entries in 3 & 4 must be
+ * greater than X which was found to be greater than 1610 earlier.
  */
+
 static int
-npc_validate_and_shift_prio_ent(struct mbox *mbox, struct roc_npc_flow *flow,
-				struct npc *npc,
-				struct npc_mcam_alloc_entry_rsp *rsp,
-				int req_prio)
+npc_sort_mcams_by_user_prio_level(struct mbox *mbox,
+				  struct npc_prio_flow_entry *flow_list_entry,
+				  struct npc *npc,
+				  struct npc_mcam_alloc_entry_rsp *rsp)
 {
-	int prio_idx = 0, rc = 0, needs_shift = 0, idx, prio = flow->priority;
-	struct npc_mcam_ents_info *info = npc->flow_entry_info;
-	int dir = (req_prio == NPC_MCAM_HIGHER_PRIO) ? 1 : -1;
-	uint32_t tot_ent = 0;
-
-	if (dir < 0)
-		prio_idx = npc->flow_max_priority - 1;
-
-	/* Only live entries needs to be shifted, free entries can just be
-	 * moved by bits manipulation.
-	 */
-
-	/* For dir = -1(NPC_MCAM_LOWER_PRIO), when shifting,
-	 * NPC_MAX_PREALLOC_ENT are exchanged with adjoining higher priority
-	 * level entries(lower indexes).
-	 *
-	 * For dir = +1(NPC_MCAM_HIGHER_PRIO), during shift,
-	 * NPC_MAX_PREALLOC_ENT are exchanged with adjoining lower priority
-	 * level entries(higher indexes) with highest indexes.
-	 */
-	do {
-		tot_ent = info[prio_idx].free_ent + info[prio_idx].live_ent;
-
-		if (dir < 0 && prio_idx != prio &&
-		    rsp->entry > info[prio_idx].max_id && tot_ent) {
-			needs_shift = 1;
-		} else if ((dir > 0) && (prio_idx != prio) &&
-			   (rsp->entry < info[prio_idx].min_id) && tot_ent) {
-			needs_shift = 1;
+	int requested_prio = flow_list_entry->flow->priority;
+	struct npc_prio_flow_entry *head, *tail;
+	struct npc_prio_flow_list_head *list;
+	uint16_t free_mcam = rsp->entry;
+	bool do_reverse_scan = true;
+	int prio_idx = 0, rc = 0;
+
+	while (prio_idx <= npc->flow_max_priority - 1) {
+		list = &npc->prio_flow_list[prio_idx];
+		tail = TAILQ_LAST(list, npc_prio_flow_list_head);
+
+		/* requested priority is lower than current level
+		 * ie, numerically req prio is higher
+		 */
+		if ((requested_prio > prio_idx) && tail) {
+			/* but there are some mcams in current level
+			 * at higher indices, ie, at priority lower
+			 * than free_mcam.
+			 */
+			if (free_mcam < tail->flow->mcam_id) {
+				rc = npc_slide_mcam_entries(
+					mbox, npc, prio_idx, &free_mcam,
+					SLIDE_ENTRIES_TO_LOWER_INDEX);
+				if (rc)
+					return rc;
+				do_reverse_scan = false;
+			}
 		}
+		prio_idx++;
+	}
 
-		if (needs_shift) {
-			needs_shift = 0;
-			rc = npc_shift_ent(mbox, flow, npc, rsp, dir, prio_idx);
-		} else {
-			for (idx = 0; idx < rsp->count; idx++)
-				rsp->entry_list[idx] = rsp->entry + idx;
-		}
-	} while ((prio_idx != prio) && (prio_idx += dir));
+	prio_idx = npc->flow_max_priority - 1;
+	while (prio_idx && do_reverse_scan) {
+		list = &npc->prio_flow_list[prio_idx];
+		head = TAILQ_FIRST(list);
 
+		/* requested priority is higher than current level
+		 * ie, numerically req prio is lower
+		 */
+		if (requested_prio < prio_idx && head) {
+			/* but free mcam is higher than lowest priority
+			 * mcam in current level
+			 */
+			if (free_mcam > head->flow->mcam_id) {
+				rc = npc_slide_mcam_entries(
+					mbox, npc, prio_idx, &free_mcam,
+					SLIDE_ENTRIES_TO_HIGHER_INDEX);
+				if (rc)
+					return rc;
+			}
+		}
+		prio_idx--;
+	}
+	rsp->entry = free_mcam;
 	return rc;
 }
 
-static int
-npc_find_ref_entry(struct npc *npc, int *prio, int prio_lvl)
+static void
+npc_insert_into_flow_list(struct npc *npc, struct npc_prio_flow_entry *entry)
 {
-	struct npc_mcam_ents_info *info = npc->flow_entry_info;
-	int step = 1;
-
-	while (step < npc->flow_max_priority) {
-		if (((prio_lvl + step) < npc->flow_max_priority) &&
-		    info[prio_lvl + step].live_ent) {
-			*prio = NPC_MCAM_HIGHER_PRIO;
-			return info[prio_lvl + step].min_id;
-		}
+	struct npc_prio_flow_list_head *list;
+	struct npc_prio_flow_entry *curr;
 
-		if (((prio_lvl - step) >= 0) &&
-		    info[prio_lvl - step].live_ent) {
-			*prio = NPC_MCAM_LOWER_PRIO;
-			return info[prio_lvl - step].max_id;
+	list = &npc->prio_flow_list[entry->flow->priority];
+	curr = TAILQ_FIRST(list);
+
+	if (curr) {
+		while (curr) {
+			if (entry->flow->mcam_id > curr->flow->mcam_id)
+				curr = TAILQ_NEXT(curr, next);
+			else
+				break;
 		}
-		step++;
+		if (curr)
+			TAILQ_INSERT_BEFORE(curr, entry, next);
+		else
+			TAILQ_INSERT_TAIL(list, entry, next);
+	} else {
+		TAILQ_INSERT_HEAD(list, entry, next);
 	}
-	*prio = NPC_MCAM_ANY_PRIO;
-	return 0;
 }
 
 static int
-npc_fill_entry_cache(struct mbox *mbox, struct roc_npc_flow *flow,
-		     struct npc *npc, uint32_t *free_ent)
+npc_allocate_mcam_entry(struct mbox *mbox, int prio,
+			struct npc_mcam_alloc_entry_rsp *rsp_local,
+			int ref_entry)
 {
-	struct plt_bitmap *free_bmp, *free_bmp_rev, *live_bmp, *live_bmp_rev;
-	struct npc_mcam_alloc_entry_rsp rsp_local;
 	struct npc_mcam_alloc_entry_rsp *rsp_cmd;
 	struct npc_mcam_alloc_entry_req *req;
 	struct npc_mcam_alloc_entry_rsp *rsp;
-	struct npc_mcam_ents_info *info;
-	int rc = -ENOSPC, prio;
-	uint16_t ref_ent, idx;
-
-	info = &npc->flow_entry_info[flow->priority];
-	free_bmp = npc->free_entries[flow->priority];
-	free_bmp_rev = npc->free_entries_rev[flow->priority];
-	live_bmp = npc->live_entries[flow->priority];
-	live_bmp_rev = npc->live_entries_rev[flow->priority];
-
-	ref_ent = npc_find_ref_entry(npc, &prio, flow->priority);
+	int rc = -ENOSPC;
 
 	req = mbox_alloc_msg_npc_mcam_alloc_entry(mbox);
 	if (req == NULL)
 		return rc;
 	req->contig = 1;
-	req->count = npc->flow_prealloc_size;
+	req->count = 1;
 	req->priority = prio;
-	req->ref_entry = ref_ent;
+	req->ref_entry = ref_entry;
 
 	rc = mbox_process_msg(mbox, (void *)&rsp_cmd);
 	if (rc)
 		return rc;
 
-	rsp = &rsp_local;
-	memcpy(rsp, rsp_cmd, sizeof(*rsp));
+	if (!rsp_cmd->count)
+		return -ENOSPC;
 
-	/* Non-first ent cache fill */
-	if (prio != NPC_MCAM_ANY_PRIO) {
-		npc_validate_and_shift_prio_ent(mbox, flow, npc, rsp, prio);
-	} else {
-		/* Copy into response entry list */
-		for (idx = 0; idx < rsp->count; idx++)
-			rsp->entry_list[idx] = rsp->entry + idx;
-	}
-
-	/* Update free entries, reverse free entries list,
-	 * min & max entry ids.
-	 */
-	for (idx = 0; idx < rsp->count; idx++) {
-		if (unlikely(rsp->entry_list[idx] < info->min_id))
-			info->min_id = rsp->entry_list[idx];
+	memcpy(rsp_local, rsp_cmd, sizeof(*rsp));
 
-		if (unlikely(rsp->entry_list[idx] > info->max_id))
-			info->max_id = rsp->entry_list[idx];
+	return 0;
+}
 
-		/* Skip entry to be returned, not to be part of free
-		 * list.
-		 */
-		if (prio == NPC_MCAM_HIGHER_PRIO) {
-			if (unlikely(idx == (rsp->count - 1))) {
-				*free_ent = rsp->entry_list[idx];
-				continue;
+static void
+npc_find_mcam_ref_entry(struct roc_npc_flow *flow, struct npc *npc, int *prio,
+			int *ref_entry, int dir)
+{
+	struct npc_prio_flow_entry *head, *tail;
+	struct npc_prio_flow_list_head *list;
+	int prio_idx = flow->priority;
+
+	if (dir == NPC_MCAM_LOWER_PRIO) {
+		while (prio_idx >= 0) {
+			list = &npc->prio_flow_list[prio_idx];
+			head = TAILQ_FIRST(list);
+			if (head) {
+				*prio = NPC_MCAM_LOWER_PRIO;
+				*ref_entry = head->flow->mcam_id;
+				return;
 			}
-		} else {
-			if (unlikely(!idx)) {
-				*free_ent = rsp->entry_list[idx];
-				continue;
+			prio_idx--;
+		}
+	} else if (dir == NPC_MCAM_HIGHER_PRIO) {
+		prio_idx = flow->priority;
+		while (prio_idx <= npc->flow_max_priority - 1) {
+			list = &npc->prio_flow_list[prio_idx];
+			tail = TAILQ_LAST(list, npc_prio_flow_list_head);
+			if (tail) {
+				*prio = NPC_MCAM_HIGHER_PRIO;
+				*ref_entry = tail->flow->mcam_id;
+				return;
 			}
+			prio_idx++;
 		}
-		info->free_ent++;
-		plt_bitmap_set(free_bmp, rsp->entry_list[idx]);
-		plt_bitmap_set(free_bmp_rev,
-			       npc->mcam_entries - rsp->entry_list[idx] - 1);
 	}
+	*prio = NPC_MCAM_ANY_PRIO;
+	*ref_entry = 0;
+}
 
-	info->live_ent++;
-	plt_bitmap_set(live_bmp, *free_ent);
-	plt_bitmap_set(live_bmp_rev, npc->mcam_entries - *free_ent - 1);
+static int
+npc_alloc_mcam_by_ref_entry(struct mbox *mbox, struct roc_npc_flow *flow,
+			    struct npc *npc,
+			    struct npc_mcam_alloc_entry_rsp *rsp_local)
+{
+	int prio, ref_entry = 0, rc = 0, dir = NPC_MCAM_LOWER_PRIO;
+	bool retry_done = false;
+
+retry:
+	npc_find_mcam_ref_entry(flow, npc, &prio, &ref_entry, dir);
+	rc = npc_allocate_mcam_entry(mbox, prio, rsp_local, ref_entry);
+	if (rc && !retry_done) {
+		plt_info(
+			"npc: Failed to allocate lower priority entry. Retrying for higher priority");
+
+		dir = NPC_MCAM_HIGHER_PRIO;
+		retry_done = true;
+		goto retry;
+	} else if (rc && retry_done) {
+		return rc;
+	}
 
 	return 0;
 }
 
 int
-npc_check_preallocated_entry_cache(struct mbox *mbox, struct roc_npc_flow *flow,
-				   struct npc *npc)
+npc_get_free_mcam_entry(struct mbox *mbox, struct roc_npc_flow *flow,
+			struct npc *npc)
 {
-	struct plt_bitmap *free, *free_rev, *live, *live_rev;
-	uint32_t pos = 0, free_ent = 0, mcam_entries;
-	struct npc_mcam_ents_info *info;
-	uint64_t slab = 0;
-	int rc;
-
-	info = &npc->flow_entry_info[flow->priority];
-
-	free_rev = npc->free_entries_rev[flow->priority];
-	free = npc->free_entries[flow->priority];
-	live_rev = npc->live_entries_rev[flow->priority];
-	live = npc->live_entries[flow->priority];
-	mcam_entries = npc->mcam_entries;
-
-	if (info->free_ent) {
-		rc = plt_bitmap_scan(free, &pos, &slab);
-		if (rc) {
-			/* Get free_ent from free entry bitmap */
-			free_ent = pos + __builtin_ctzll(slab);
-			/* Remove from free bitmaps and add to live ones */
-			plt_bitmap_clear(free, free_ent);
-			plt_bitmap_set(live, free_ent);
-			plt_bitmap_clear(free_rev, mcam_entries - free_ent - 1);
-			plt_bitmap_set(live_rev, mcam_entries - free_ent - 1);
-
-			info->free_ent--;
-			info->live_ent++;
-			return free_ent;
-		}
-		return NPC_ERR_INTERNAL;
-	}
+	struct npc_mcam_alloc_entry_rsp rsp_local;
+	struct npc_prio_flow_entry *new_entry;
+	int rc = 0;
+
+	rc = npc_alloc_mcam_by_ref_entry(mbox, flow, npc, &rsp_local);
 
-	rc = npc_fill_entry_cache(mbox, flow, npc, &free_ent);
 	if (rc)
 		return rc;
 
-	return free_ent;
+	new_entry = plt_zmalloc(sizeof(*new_entry), 0);
+	if (!new_entry)
+		return -ENOSPC;
+
+	new_entry->flow = flow;
+
+	plt_info("npc: kernel allocated MCAM entry %d", rsp_local.entry);
+
+	rc = npc_sort_mcams_by_user_prio_level(mbox, new_entry, npc,
+					       &rsp_local);
+	if (rc)
+		goto err;
+
+	plt_info("npc: allocated MCAM entry after sorting %d", rsp_local.entry);
+	flow->mcam_id = rsp_local.entry;
+	npc_insert_into_flow_list(npc, new_entry);
+
+	return rsp_local.entry;
+err:
+	plt_free(new_entry);
+	return rc;
+}
+
+void
+npc_delete_prio_list_entry(struct npc *npc, struct roc_npc_flow *flow)
+{
+	struct npc_prio_flow_list_head *list;
+	struct npc_prio_flow_entry *curr;
+
+	list = &npc->prio_flow_list[flow->priority];
+	curr = TAILQ_FIRST(list);
+
+	if (!curr)
+		return;
+
+	while (curr) {
+		if (flow->mcam_id == curr->flow->mcam_id) {
+			TAILQ_REMOVE(list, curr, next);
+			plt_free(curr);
+			break;
+		}
+		curr = TAILQ_NEXT(curr, next);
+	}
 }
-- 
2.25.4


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [dpdk-dev] [PATCH] common/cnxk: change MCAM entries management scheme
  2021-10-05  2:55 [dpdk-dev] [PATCH] common/cnxk: change MCAM entries management scheme psatheesh
@ 2021-10-19 19:05 ` Jerin Jacob
  0 siblings, 0 replies; 2+ messages in thread
From: Jerin Jacob @ 2021-10-19 19:05 UTC (permalink / raw)
  To: Satheesh Paul, Ferruh Yigit
  Cc: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao, dpdk-dev

On Tue, Oct 5, 2021 at 8:25 AM <psatheesh@marvell.com> wrote:
>
> From: Satheesh Paul <psatheesh@marvell.com>
>
> This patch removes the MCAM preallocation scheme. The free
> entry cache is removed and for every flow created, an MCAM
> allocation request is made to the kernel. Each priority level
> has a list of MCAM entries. For every flow rule added, the
> MCAM entry obtained from kernel is checked if it is at the
> correct user specified priority. If not, the existing rules
> are moved across MCAM entries so that the user specified
> priority is maintained.
>
> Signed-off-by: Satheesh Paul <psatheesh@marvell.com>
> Reviewed-by: Kiran Kumar Kokkilagadda <kirankumark@marvell.com>

Changed the subject to: common/cnxk: improve MCAM entries management

Applied to dpdk-next-net-mrvl/for-next-net. Thanks

> ---
>  drivers/common/cnxk/roc_npc.c       | 112 +----
>  drivers/common/cnxk/roc_npc_mcam.c  |  24 +-
>  drivers/common/cnxk/roc_npc_priv.h  |  33 +-
>  drivers/common/cnxk/roc_npc_utils.c | 645 +++++++++++++++-------------
>  4 files changed, 387 insertions(+), 427 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_npc.c b/drivers/common/cnxk/roc_npc.c
> index b724ff9401..e878bbc8e2 100644
> --- a/drivers/common/cnxk/roc_npc.c
> +++ b/drivers/common/cnxk/roc_npc.c
> @@ -179,46 +179,6 @@ roc_npc_init(struct roc_npc *roc_npc)
>                 return rc;
>         }
>
> -       sz = npc->flow_max_priority * sizeof(struct npc_mcam_ents_info);
> -       npc->flow_entry_info = plt_zmalloc(sz, 0);
> -       if (npc->flow_entry_info == NULL) {
> -               plt_err("flow_entry_info alloc failed");
> -               rc = NPC_ERR_NO_MEM;
> -               goto done;
> -       }
> -
> -       sz = npc->flow_max_priority * sizeof(struct plt_bitmap *);
> -       npc->free_entries = plt_zmalloc(sz, 0);
> -       if (npc->free_entries == NULL) {
> -               plt_err("free_entries alloc failed");
> -               rc = NPC_ERR_NO_MEM;
> -               goto done;
> -       }
> -
> -       sz = npc->flow_max_priority * sizeof(struct plt_bitmap *);
> -       npc->free_entries_rev = plt_zmalloc(sz, 0);
> -       if (npc->free_entries_rev == NULL) {
> -               plt_err("free_entries_rev alloc failed");
> -               rc = NPC_ERR_NO_MEM;
> -               goto done;
> -       }
> -
> -       sz = npc->flow_max_priority * sizeof(struct plt_bitmap *);
> -       npc->live_entries = plt_zmalloc(sz, 0);
> -       if (npc->live_entries == NULL) {
> -               plt_err("live_entries alloc failed");
> -               rc = NPC_ERR_NO_MEM;
> -               goto done;
> -       }
> -
> -       sz = npc->flow_max_priority * sizeof(struct plt_bitmap *);
> -       npc->live_entries_rev = plt_zmalloc(sz, 0);
> -       if (npc->live_entries_rev == NULL) {
> -               plt_err("live_entries_rev alloc failed");
> -               rc = NPC_ERR_NO_MEM;
> -               goto done;
> -       }
> -
>         sz = npc->flow_max_priority * sizeof(struct npc_flow_list);
>         npc->flow_list = plt_zmalloc(sz, 0);
>         if (npc->flow_list == NULL) {
> @@ -227,30 +187,18 @@ roc_npc_init(struct roc_npc *roc_npc)
>                 goto done;
>         }
>
> +       sz = npc->flow_max_priority * sizeof(struct npc_prio_flow_list_head);
> +       npc->prio_flow_list = plt_zmalloc(sz, 0);
> +       if (npc->prio_flow_list == NULL) {
> +               plt_err("prio_flow_list alloc failed");
> +               rc = NPC_ERR_NO_MEM;
> +               goto done;
> +       }
> +
>         npc_mem = mem;
>         for (idx = 0; idx < npc->flow_max_priority; idx++) {
>                 TAILQ_INIT(&npc->flow_list[idx]);
> -
> -               npc->free_entries[idx] =
> -                       plt_bitmap_init(npc->mcam_entries, mem, bmap_sz);
> -               mem += bmap_sz;
> -
> -               npc->free_entries_rev[idx] =
> -                       plt_bitmap_init(npc->mcam_entries, mem, bmap_sz);
> -               mem += bmap_sz;
> -
> -               npc->live_entries[idx] =
> -                       plt_bitmap_init(npc->mcam_entries, mem, bmap_sz);
> -               mem += bmap_sz;
> -
> -               npc->live_entries_rev[idx] =
> -                       plt_bitmap_init(npc->mcam_entries, mem, bmap_sz);
> -               mem += bmap_sz;
> -
> -               npc->flow_entry_info[idx].free_ent = 0;
> -               npc->flow_entry_info[idx].live_ent = 0;
> -               npc->flow_entry_info[idx].max_id = 0;
> -               npc->flow_entry_info[idx].min_id = ~(0);
> +               TAILQ_INIT(&npc->prio_flow_list[idx]);
>         }
>
>         npc->rss_grps = NPC_RSS_GRPS;
> @@ -281,16 +229,8 @@ roc_npc_init(struct roc_npc *roc_npc)
>  done:
>         if (npc->flow_list)
>                 plt_free(npc->flow_list);
> -       if (npc->live_entries_rev)
> -               plt_free(npc->live_entries_rev);
> -       if (npc->live_entries)
> -               plt_free(npc->live_entries);
> -       if (npc->free_entries_rev)
> -               plt_free(npc->free_entries_rev);
> -       if (npc->free_entries)
> -               plt_free(npc->free_entries);
> -       if (npc->flow_entry_info)
> -               plt_free(npc->flow_entry_info);
> +       if (npc->prio_flow_list)
> +               plt_free(npc->prio_flow_list);
>         if (npc_mem)
>                 plt_free(npc_mem);
>         return rc;
> @@ -313,29 +253,9 @@ roc_npc_fini(struct roc_npc *roc_npc)
>                 npc->flow_list = NULL;
>         }
>
> -       if (npc->live_entries_rev) {
> -               plt_free(npc->live_entries_rev);
> -               npc->live_entries_rev = NULL;
> -       }
> -
> -       if (npc->live_entries) {
> -               plt_free(npc->live_entries);
> -               npc->live_entries = NULL;
> -       }
> -
> -       if (npc->free_entries_rev) {
> -               plt_free(npc->free_entries_rev);
> -               npc->free_entries_rev = NULL;
> -       }
> -
> -       if (npc->free_entries) {
> -               plt_free(npc->free_entries);
> -               npc->free_entries = NULL;
> -       }
> -
> -       if (npc->flow_entry_info) {
> -               plt_free(npc->flow_entry_info);
> -               npc->flow_entry_info = NULL;
> +       if (npc->prio_flow_list) {
> +               plt_free(npc->prio_flow_list);
> +               npc->prio_flow_list = NULL;
>         }
>
>         return 0;
> @@ -1269,7 +1189,6 @@ int
>  roc_npc_flow_destroy(struct roc_npc *roc_npc, struct roc_npc_flow *flow)
>  {
>         struct npc *npc = roc_npc_to_npc_priv(roc_npc);
> -       struct plt_bitmap *bmap;
>         int rc;
>
>         rc = npc_rss_group_free(npc, flow);
> @@ -1290,8 +1209,7 @@ roc_npc_flow_destroy(struct roc_npc *roc_npc, struct roc_npc_flow *flow)
>
>         TAILQ_REMOVE(&npc->flow_list[flow->priority], flow, next);
>
> -       bmap = npc->live_entries[flow->priority];
> -       plt_bitmap_clear(bmap, flow->mcam_id);
> +       npc_delete_prio_list_entry(npc, flow);
>
>         plt_free(flow);
>         return 0;
> diff --git a/drivers/common/cnxk/roc_npc_mcam.c b/drivers/common/cnxk/roc_npc_mcam.c
> index 91ed2dd511..ba7f89b45b 100644
> --- a/drivers/common/cnxk/roc_npc_mcam.c
> +++ b/drivers/common/cnxk/roc_npc_mcam.c
> @@ -520,7 +520,7 @@ npc_mcam_alloc_and_write(struct npc *npc, struct roc_npc_flow *flow,
>                         return rc;
>         }
>
> -       entry = npc_check_preallocated_entry_cache(mbox, flow, npc);
> +       entry = npc_get_free_mcam_entry(mbox, flow, npc);
>         if (entry < 0) {
>                 if (use_ctr)
>                         npc_mcam_free_counter(npc, ctr);
> @@ -587,6 +587,9 @@ npc_mcam_alloc_and_write(struct npc *npc, struct roc_npc_flow *flow,
>                 pf_func = plt_cpu_to_be_16(pf_func);
>                 req->entry_data.kw[0] |= ((uint64_t)pf_func << 32);
>                 req->entry_data.kw_mask[0] |= ((uint64_t)0xffff << 32);
> +
> +               flow->mcam_data[0] |= ((uint64_t)pf_func << 32);
> +               flow->mcam_mask[0] |= ((uint64_t)0xffff << 32);
>         }
>
>         rc = mbox_process_msg(mbox, (void *)&rsp);
> @@ -594,6 +597,7 @@ npc_mcam_alloc_and_write(struct npc *npc, struct roc_npc_flow *flow,
>                 return rc;
>
>         flow->mcam_id = entry;
> +
>         if (use_ctr)
>                 flow->ctr_id = ctr;
>         return 0;
> @@ -697,20 +701,9 @@ npc_program_mcam(struct npc *npc, struct npc_parse_state *pst, bool mcam_alloc)
>  int
>  npc_flow_free_all_resources(struct npc *npc)
>  {
> -       struct npc_mcam_ents_info *info;
>         struct roc_npc_flow *flow;
> -       struct plt_bitmap *bmap;
> -       int entry_count = 0;
>         int rc, idx;
>
> -       for (idx = 0; idx < npc->flow_max_priority; idx++) {
> -               info = &npc->flow_entry_info[idx];
> -               entry_count += info->live_ent;
> -       }
> -
> -       if (entry_count == 0)
> -               return 0;
> -
>         /* Free all MCAM entries allocated */
>         rc = npc_mcam_free_all_entries(npc);
>
> @@ -721,14 +714,11 @@ npc_flow_free_all_resources(struct npc *npc)
>                         if (flow->ctr_id != NPC_COUNTER_NONE)
>                                 rc |= npc_mcam_free_counter(npc, flow->ctr_id);
>
> +                       npc_delete_prio_list_entry(npc, flow);
> +
>                         TAILQ_REMOVE(&npc->flow_list[idx], flow, next);
>                         plt_free(flow);
> -                       bmap = npc->live_entries[flow->priority];
> -                       plt_bitmap_clear(bmap, flow->mcam_id);
>                 }
> -               info = &npc->flow_entry_info[idx];
> -               info->free_ent = 0;
> -               info->live_ent = 0;
>         }
>         return rc;
>  }
> diff --git a/drivers/common/cnxk/roc_npc_priv.h b/drivers/common/cnxk/roc_npc_priv.h
> index 2567846a77..712302bc5c 100644
> --- a/drivers/common/cnxk/roc_npc_priv.h
> +++ b/drivers/common/cnxk/roc_npc_priv.h
> @@ -344,14 +344,13 @@ struct npc_get_datax_cfg {
>
>  TAILQ_HEAD(npc_flow_list, roc_npc_flow);
>
> -struct npc_mcam_ents_info {
> -       /* Current max & min values of mcam index */
> -       uint32_t max_id;
> -       uint32_t min_id;
> -       uint32_t free_ent;
> -       uint32_t live_ent;
> +struct npc_prio_flow_entry {
> +       struct roc_npc_flow *flow;
> +       TAILQ_ENTRY(npc_prio_flow_entry) next;
>  };
>
> +TAILQ_HEAD(npc_prio_flow_list_head, npc_prio_flow_entry);
> +
>  struct npc {
>         struct mbox *mbox;                      /* Mbox */
>         uint32_t keyx_supp_nmask[NPC_MAX_INTF]; /* nibble mask */
> @@ -371,22 +370,8 @@ struct npc {
>         npc_dxcfg_t prx_dxcfg;       /* intf, lid, lt, extract */
>         npc_fxcfg_t prx_fxcfg;       /* Flag extract */
>         npc_ld_flags_t prx_lfcfg;    /* KEX LD_Flags CFG */
> -       /* mcam entry info per priority level: both free & in-use */
> -       struct npc_mcam_ents_info *flow_entry_info;
> -       /* Bitmap of free preallocated entries in ascending index &
> -        * descending priority
> -        */
> -       struct plt_bitmap **free_entries;
> -       /* Bitmap of free preallocated entries in descending index &
> -        * ascending priority
> -        */
> -       struct plt_bitmap **free_entries_rev;
> -       /* Bitmap of live entries in ascending index & descending priority */
> -       struct plt_bitmap **live_entries;
> -       /* Bitmap of live entries in descending index & ascending priority */
> -       struct plt_bitmap **live_entries_rev;
> -       /* Priority bucket wise tail queue of all npc_flow resources */
>         struct npc_flow_list *flow_list;
> +       struct npc_prio_flow_list_head *prio_flow_list;
>         struct plt_bitmap *rss_grp_entries;
>  };
>
> @@ -431,9 +416,9 @@ int npc_parse_lf(struct npc_parse_state *pst);
>  int npc_parse_lg(struct npc_parse_state *pst);
>  int npc_parse_lh(struct npc_parse_state *pst);
>  int npc_mcam_fetch_kex_cfg(struct npc *npc);
> -int npc_check_preallocated_entry_cache(struct mbox *mbox,
> -                                      struct roc_npc_flow *flow,
> -                                      struct npc *npc);
> +int npc_get_free_mcam_entry(struct mbox *mbox, struct roc_npc_flow *flow,
> +                           struct npc *npc);
> +void npc_delete_prio_list_entry(struct npc *npc, struct roc_npc_flow *flow);
>  int npc_flow_free_all_resources(struct npc *npc);
>  const struct roc_npc_item_info *
>  npc_parse_skip_void_and_any_items(const struct roc_npc_item_info *pattern);
> diff --git a/drivers/common/cnxk/roc_npc_utils.c b/drivers/common/cnxk/roc_npc_utils.c
> index 5fcb56c35b..ed0ef5c462 100644
> --- a/drivers/common/cnxk/roc_npc_utils.c
> +++ b/drivers/common/cnxk/roc_npc_utils.c
> @@ -259,48 +259,56 @@ npc_update_parse_state(struct npc_parse_state *pst,
>  }
>
>  static int
> -npc_first_set_bit(uint64_t slab)
> +npc_initialise_mcam_entry(struct npc *npc, struct roc_npc_flow *flow,
> +                         int mcam_id)
>  {
> -       int num = 0;
> +       struct npc_mcam_write_entry_req *req;
> +       struct npc_mcam_write_entry_rsq *rsp;
> +       int rc = 0, idx;
>
> -       if ((slab & 0xffffffff) == 0) {
> -               num += 32;
> -               slab >>= 32;
> -       }
> -       if ((slab & 0xffff) == 0) {
> -               num += 16;
> -               slab >>= 16;
> -       }
> -       if ((slab & 0xff) == 0) {
> -               num += 8;
> -               slab >>= 8;
> -       }
> -       if ((slab & 0xf) == 0) {
> -               num += 4;
> -               slab >>= 4;
> +       req = mbox_alloc_msg_npc_mcam_write_entry(npc->mbox);
> +       if (req == NULL)
> +               return -ENOSPC;
> +       req->set_cntr = 0;
> +       req->cntr = 0;
> +       req->entry = mcam_id;
> +
> +       req->intf = (flow->nix_intf == NIX_INTF_RX) ? NPC_MCAM_RX : NPC_MCAM_TX;
> +       req->enable_entry = 1;
> +       req->entry_data.action = flow->npc_action;
> +       req->entry_data.vtag_action = flow->vtag_action;
> +
> +       for (idx = 0; idx < ROC_NPC_MAX_MCAM_WIDTH_DWORDS; idx++) {
> +               req->entry_data.kw[idx] = 0x0;
> +               req->entry_data.kw_mask[idx] = 0x0;
>         }
> -       if ((slab & 0x3) == 0) {
> -               num += 2;
> -               slab >>= 2;
> +
> +       if (flow->nix_intf == NIX_INTF_RX) {
> +               req->entry_data.kw[0] |= (uint64_t)npc->channel;
> +               req->entry_data.kw_mask[0] |= (BIT_ULL(12) - 1);
> +       } else {
> +               uint16_t pf_func = (flow->npc_action >> 4) & 0xffff;
> +
> +               pf_func = plt_cpu_to_be_16(pf_func);
> +               req->entry_data.kw[0] |= ((uint64_t)pf_func << 32);
> +               req->entry_data.kw_mask[0] |= ((uint64_t)0xffff << 32);
>         }
> -       if ((slab & 0x1) == 0)
> -               num += 1;
>
> -       return num;
> +       rc = mbox_process_msg(npc->mbox, (void *)&rsp);
> +       if (rc != 0) {
> +               plt_err("npc: mcam initialisation write failed");
> +               return rc;
> +       }
> +       return 0;
>  }
>
>  static int
> -npc_shift_lv_ent(struct mbox *mbox, struct roc_npc_flow *flow, struct npc *npc,
> -                uint32_t old_ent, uint32_t new_ent)
> +npc_shift_mcam_entry(struct mbox *mbox, uint16_t old_ent, uint16_t new_ent)
>  {
>         struct npc_mcam_shift_entry_req *req;
>         struct npc_mcam_shift_entry_rsp *rsp;
> -       struct npc_flow_list *list;
> -       struct roc_npc_flow *flow_iter;
>         int rc = -ENOSPC;
>
> -       list = &npc->flow_list[flow->priority];
> -
>         /* Old entry is disabled & it's contents are moved to new_entry,
>          * new entry is enabled finally.
>          */
> @@ -315,323 +323,382 @@ npc_shift_lv_ent(struct mbox *mbox, struct roc_npc_flow *flow, struct npc *npc,
>         if (rc)
>                 return rc;
>
> -       /* Remove old node from list */
> -       TAILQ_FOREACH(flow_iter, list, next) {
> -               if (flow_iter->mcam_id == old_ent)
> -                       TAILQ_REMOVE(list, flow_iter, next);
> -       }
> -
> -       /* Insert node with new mcam id at right place */
> -       TAILQ_FOREACH(flow_iter, list, next) {
> -               if (flow_iter->mcam_id > new_ent)
> -                       TAILQ_INSERT_BEFORE(flow_iter, flow, next);
> -       }
> -       return rc;
> +       return 0;
>  }
>
> -/* Exchange all required entries with a given priority level */
> +enum SHIFT_DIR {
> +       SLIDE_ENTRIES_TO_LOWER_INDEX,
> +       SLIDE_ENTRIES_TO_HIGHER_INDEX,
> +};
> +
>  static int
> -npc_shift_ent(struct mbox *mbox, struct roc_npc_flow *flow, struct npc *npc,
> -             struct npc_mcam_alloc_entry_rsp *rsp, int dir, int prio_lvl)
> +npc_slide_mcam_entries(struct mbox *mbox, struct npc *npc, int prio,
> +                      uint16_t *free_mcam_id, int dir)
>  {
> -       struct plt_bitmap *fr_bmp, *fr_bmp_rev, *lv_bmp, *lv_bmp_rev, *bmp;
> -       uint32_t e_fr = 0, e_lv = 0, e, e_id = 0, mcam_entries;
> -       uint64_t fr_bit_pos = 0, lv_bit_pos = 0, bit_pos = 0;
> -       /* Bit position within the slab */
> -       uint32_t sl_fr_bit_off = 0, sl_lv_bit_off = 0;
> -       /* Overall bit position of the start of slab */
> -       /* free & live entry index */
> -       int rc_fr = 0, rc_lv = 0, rc = 0, idx = 0;
> -       struct npc_mcam_ents_info *ent_info;
> -       /* free & live bitmap slab */
> -       uint64_t sl_fr = 0, sl_lv = 0, *sl;
> -
> -       fr_bmp = npc->free_entries[prio_lvl];
> -       fr_bmp_rev = npc->free_entries_rev[prio_lvl];
> -       lv_bmp = npc->live_entries[prio_lvl];
> -       lv_bmp_rev = npc->live_entries_rev[prio_lvl];
> -       ent_info = &npc->flow_entry_info[prio_lvl];
> -       mcam_entries = npc->mcam_entries;
> -
> -       /* New entries allocated are always contiguous, but older entries
> -        * already in free/live bitmap can be non-contiguous: so return
> -        * shifted entries should be in non-contiguous format.
> -        */
> -       while (idx <= rsp->count) {
> -               if (!sl_fr && !sl_lv) {
> -                       /* Lower index elements to be exchanged */
> -                       if (dir < 0) {
> -                               rc_fr = plt_bitmap_scan(fr_bmp, &e_fr, &sl_fr);
> -                               rc_lv = plt_bitmap_scan(lv_bmp, &e_lv, &sl_lv);
> -                       } else {
> -                               rc_fr = plt_bitmap_scan(fr_bmp_rev,
> -                                                       &sl_fr_bit_off, &sl_fr);
> -                               rc_lv = plt_bitmap_scan(lv_bmp_rev,
> -                                                       &sl_lv_bit_off, &sl_lv);
> -                       }
> -               }
> -
> -               if (rc_fr) {
> -                       fr_bit_pos = npc_first_set_bit(sl_fr);
> -                       e_fr = sl_fr_bit_off + fr_bit_pos;
> -               } else {
> -                       e_fr = ~(0);
> -               }
> -
> -               if (rc_lv) {
> -                       lv_bit_pos = npc_first_set_bit(sl_lv);
> -                       e_lv = sl_lv_bit_off + lv_bit_pos;
> -               } else {
> -                       e_lv = ~(0);
> -               }
> -
> -               /* First entry is from free_bmap */
> -               if (e_fr < e_lv) {
> -                       bmp = fr_bmp;
> -                       e = e_fr;
> -                       sl = &sl_fr;
> -                       bit_pos = fr_bit_pos;
> -                       if (dir > 0)
> -                               e_id = mcam_entries - e - 1;
> -                       else
> -                               e_id = e;
> -               } else {
> -                       bmp = lv_bmp;
> -                       e = e_lv;
> -                       sl = &sl_lv;
> -                       bit_pos = lv_bit_pos;
> -                       if (dir > 0)
> -                               e_id = mcam_entries - e - 1;
> -                       else
> -                               e_id = e;
> -
> -                       if (idx < rsp->count)
> -                               rc = npc_shift_lv_ent(mbox, flow, npc, e_id,
> -                                                     rsp->entry + idx);
> +       uint16_t to_mcam_id = 0, from_mcam_id = 0;
> +       struct npc_prio_flow_list_head *list;
> +       struct npc_prio_flow_entry *curr = 0;
> +       int rc = 0;
> +
> +       list = &npc->prio_flow_list[prio];
> +
> +       to_mcam_id = *free_mcam_id;
> +       if (dir == SLIDE_ENTRIES_TO_HIGHER_INDEX)
> +               curr = TAILQ_LAST(list, npc_prio_flow_list_head);
> +       else if (dir == SLIDE_ENTRIES_TO_LOWER_INDEX)
> +               curr = TAILQ_FIRST(list);
> +
> +       while (curr) {
> +               from_mcam_id = curr->flow->mcam_id;
> +               if ((dir == SLIDE_ENTRIES_TO_HIGHER_INDEX &&
> +                    from_mcam_id < to_mcam_id) ||
> +                   (dir == SLIDE_ENTRIES_TO_LOWER_INDEX &&
> +                    from_mcam_id > to_mcam_id)) {
> +                       /* Newly allocated entry and the source entry given to
> +                        * npc_mcam_shift_entry_req will be in disabled state.
> +                        * Initialise and enable before moving an entry into
> +                        * this mcam.
> +                        */
> +                       rc = npc_initialise_mcam_entry(npc, curr->flow,
> +                                                      to_mcam_id);
> +                       if (rc)
> +                               return rc;
> +                       rc = npc_shift_mcam_entry(mbox, from_mcam_id,
> +                                                 to_mcam_id);
> +                       if (rc)
> +                               return rc;
> +                       curr->flow->mcam_id = to_mcam_id;
> +                       to_mcam_id = from_mcam_id;
>                 }
>
> -               plt_bitmap_clear(bmp, e);
> -               plt_bitmap_set(bmp, rsp->entry + idx);
> -               /* Update entry list, use non-contiguous
> -                * list now.
> -                */
> -               rsp->entry_list[idx] = e_id;
> -               *sl &= ~(1UL << bit_pos);
> +               if (dir == SLIDE_ENTRIES_TO_HIGHER_INDEX)
> +                       curr = TAILQ_PREV(curr, npc_prio_flow_list_head, next);
> +               else if (dir == SLIDE_ENTRIES_TO_LOWER_INDEX)
> +                       curr = TAILQ_NEXT(curr, next);
> +       }
>
> -               /* Update min & max entry identifiers in current
> -                * priority level.
> -                */
> -               if (dir < 0) {
> -                       ent_info->max_id = rsp->entry + idx;
> -                       ent_info->min_id = e_id;
> -               } else {
> -                       ent_info->max_id = e_id;
> -                       ent_info->min_id = rsp->entry;
> -               }
> +       *free_mcam_id = from_mcam_id;
>
> -               idx++;
> -       }
> -       return rc;
> +       return 0;
>  }
>
> -/* Validate if newly allocated entries lie in the correct priority zone
> - * since NPC_MCAM_LOWER_PRIO & NPC_MCAM_HIGHER_PRIO don't ensure zone accuracy.
> - * If not properly aligned, shift entries to do so
> +/*
> + * The mcam_alloc request is first made with NPC_MCAM_LOWER_PRIO with the last
> + * entry in the requested priority level as the reference entry. If it fails,
> + * the alloc request is retried with NPC_MCAM_HIGHER_PRIO with the first entry
> + * in the next lower priority level as the reference entry. After obtaining
> + * the free MCAM from kernel, we check if it is at the right user requested
> + * priority level. If not, the flow rules are moved across MCAM entries till
> + * the user requested priority levels are met.
> + * The MCAM sorting algorithm works as below.
> + * For any given free MCAM obtained from the kernel, there are 3 possibilities.
> + * Case 1:
> + * There are entries belonging to higher user priority level (numerically
> + * lesser) in higher mcam indices. In this case, the entries with higher user
> + * priority are slided towards lower indices and a free entry is created in the
> + * higher indices.
> + * Example:
> + * Assume free entry = 1610, user requested priority = 2 and
> + * max user priority levels = 5 with below entries in respective priority
> + * levels.
> + * 0: 1630, 1635, 1641
> + * 1: 1646, 1650, 1651
> + * 2: 1652, 1655, 1660
> + * 3: 1661, 1662, 1663, 1664
> + * 4: 1665, 1667, 1670
> + *
> + * Entries (1630, 1635, 1641, 1646, 1650, 1651) have to be slided down towards
> + * lower indices.
> + * Shifting sequence will be as below:
> + *     1610 <- 1630 <- 1635 <- 1641 <- 1646 <- 1650 <- 1651
> + * Entry 1651 will be free-ed for writing the new flow. This entry will now
> + * become the head of priority level 2.
> + *
> + * Case 2:
> + * There are entries belonging to lower user priority level (numerically
> + * bigger) in lower mcam indices. In this case, the entries with lower user
> + * priority are slided towards higher indices and a free entry is created in the
> + * lower indices.
> + *
> + * Example:
> + * free entry = 1653, user requested priority = 0
> + * 0: 1630, 1635, 1641
> + * 1: 1646, 1650, 1651
> + * 2: 1652, 1655, 1660
> + * 3: 1661, 1662, 1663, 1664
> + * 4: 1665, 1667, 1670
> + *
> + * Entries (1646, 1650, 1651, 1652) have to be slided up towards higher
> + * indices.
> + * Shifting sequence will be as below:
> + *     1646 -> 1650 -> 1651 -> 1652 -> 1653
> + * Entry 1646 will be free-ed for writing the new flow. This entry will now
> + * become the last element in priority level 0.
> + *
> + * Case 3:
> + * Free mcam is at the right place, ie, all higher user priority level
> + * mcams lie in lower indices and all lower user priority level mcams lie in
> + * higher mcam indices.
> + *
> + * The priority level lists are scanned first for case (1) and if the
> + * condition is found true, case(2) is skipped because they are mutually
> + * exclusive. For example, consider below state.
> + * 0: 1630, 1635, 1641
> + * 1: 1646, 1650, 1651
> + * 2: 1652, 1655, 1660
> + * 3: 1661, 1662, 1663, 1664
> + * 4: 1665, 1667, 1670
> + * free entry = 1610, user requested priority = 2
> + *
> + * Case 1: Here the condition is;
> + * "if (requested_prio > prio_idx && free_mcam < tail->flow->mcam_id ){}"
> + * If this condition is true, it means at some higher priority level than
> + * requested priority level, there are entries at lower indices than the given
> + * free mcam. That is, we have found in levels 0,1 there is an mcam X which is
> + * greater than 1610.
> + * If, for any free entry and user req prio, the above condition is true, then
> + * the below case(2) condition will always be false since the lists are kept
> + * sorted. The case(2) condition is;
> + *  "if (requested_prio < prio_idx && free_mcam > head->flow->mcam_id){}"
> + * There can't be entries at lower indices at priority level higher
> + * than the requested priority level. That is, here, at levels 3 & 4 there
> + * cannot be any entry greater than 1610. Because all entries in 3 & 4 must be
> + * greater than X which was found to be greater than 1610 earlier.
>   */
> +
>  static int
> -npc_validate_and_shift_prio_ent(struct mbox *mbox, struct roc_npc_flow *flow,
> -                               struct npc *npc,
> -                               struct npc_mcam_alloc_entry_rsp *rsp,
> -                               int req_prio)
> +npc_sort_mcams_by_user_prio_level(struct mbox *mbox,
> +                                 struct npc_prio_flow_entry *flow_list_entry,
> +                                 struct npc *npc,
> +                                 struct npc_mcam_alloc_entry_rsp *rsp)
>  {
> -       int prio_idx = 0, rc = 0, needs_shift = 0, idx, prio = flow->priority;
> -       struct npc_mcam_ents_info *info = npc->flow_entry_info;
> -       int dir = (req_prio == NPC_MCAM_HIGHER_PRIO) ? 1 : -1;
> -       uint32_t tot_ent = 0;
> -
> -       if (dir < 0)
> -               prio_idx = npc->flow_max_priority - 1;
> -
> -       /* Only live entries needs to be shifted, free entries can just be
> -        * moved by bits manipulation.
> -        */
> -
> -       /* For dir = -1(NPC_MCAM_LOWER_PRIO), when shifting,
> -        * NPC_MAX_PREALLOC_ENT are exchanged with adjoining higher priority
> -        * level entries(lower indexes).
> -        *
> -        * For dir = +1(NPC_MCAM_HIGHER_PRIO), during shift,
> -        * NPC_MAX_PREALLOC_ENT are exchanged with adjoining lower priority
> -        * level entries(higher indexes) with highest indexes.
> -        */
> -       do {
> -               tot_ent = info[prio_idx].free_ent + info[prio_idx].live_ent;
> -
> -               if (dir < 0 && prio_idx != prio &&
> -                   rsp->entry > info[prio_idx].max_id && tot_ent) {
> -                       needs_shift = 1;
> -               } else if ((dir > 0) && (prio_idx != prio) &&
> -                          (rsp->entry < info[prio_idx].min_id) && tot_ent) {
> -                       needs_shift = 1;
> +       int requested_prio = flow_list_entry->flow->priority;
> +       struct npc_prio_flow_entry *head, *tail;
> +       struct npc_prio_flow_list_head *list;
> +       uint16_t free_mcam = rsp->entry;
> +       bool do_reverse_scan = true;
> +       int prio_idx = 0, rc = 0;
> +
> +       while (prio_idx <= npc->flow_max_priority - 1) {
> +               list = &npc->prio_flow_list[prio_idx];
> +               tail = TAILQ_LAST(list, npc_prio_flow_list_head);
> +
> +               /* requested priority is lower than current level
> +                * ie, numerically req prio is higher
> +                */
> +               if ((requested_prio > prio_idx) && tail) {
> +                       /* but there are some mcams in current level
> +                        * at higher indices, ie, at priority lower
> +                        * than free_mcam.
> +                        */
> +                       if (free_mcam < tail->flow->mcam_id) {
> +                               rc = npc_slide_mcam_entries(
> +                                       mbox, npc, prio_idx, &free_mcam,
> +                                       SLIDE_ENTRIES_TO_LOWER_INDEX);
> +                               if (rc)
> +                                       return rc;
> +                               do_reverse_scan = false;
> +                       }
>                 }
> +               prio_idx++;
> +       }
>
> -               if (needs_shift) {
> -                       needs_shift = 0;
> -                       rc = npc_shift_ent(mbox, flow, npc, rsp, dir, prio_idx);
> -               } else {
> -                       for (idx = 0; idx < rsp->count; idx++)
> -                               rsp->entry_list[idx] = rsp->entry + idx;
> -               }
> -       } while ((prio_idx != prio) && (prio_idx += dir));
> +       prio_idx = npc->flow_max_priority - 1;
> +       while (prio_idx && do_reverse_scan) {
> +               list = &npc->prio_flow_list[prio_idx];
> +               head = TAILQ_FIRST(list);
>
> +               /* requested priority is higher than current level
> +                * ie, numerically req prio is lower
> +                */
> +               if (requested_prio < prio_idx && head) {
> +                       /* but free mcam is higher than lowest priority
> +                        * mcam in current level
> +                        */
> +                       if (free_mcam > head->flow->mcam_id) {
> +                               rc = npc_slide_mcam_entries(
> +                                       mbox, npc, prio_idx, &free_mcam,
> +                                       SLIDE_ENTRIES_TO_HIGHER_INDEX);
> +                               if (rc)
> +                                       return rc;
> +                       }
> +               }
> +               prio_idx--;
> +       }
> +       rsp->entry = free_mcam;
>         return rc;
>  }
>
> -static int
> -npc_find_ref_entry(struct npc *npc, int *prio, int prio_lvl)
> +static void
> +npc_insert_into_flow_list(struct npc *npc, struct npc_prio_flow_entry *entry)
>  {
> -       struct npc_mcam_ents_info *info = npc->flow_entry_info;
> -       int step = 1;
> -
> -       while (step < npc->flow_max_priority) {
> -               if (((prio_lvl + step) < npc->flow_max_priority) &&
> -                   info[prio_lvl + step].live_ent) {
> -                       *prio = NPC_MCAM_HIGHER_PRIO;
> -                       return info[prio_lvl + step].min_id;
> -               }
> +       struct npc_prio_flow_list_head *list;
> +       struct npc_prio_flow_entry *curr;
>
> -               if (((prio_lvl - step) >= 0) &&
> -                   info[prio_lvl - step].live_ent) {
> -                       *prio = NPC_MCAM_LOWER_PRIO;
> -                       return info[prio_lvl - step].max_id;
> +       list = &npc->prio_flow_list[entry->flow->priority];
> +       curr = TAILQ_FIRST(list);
> +
> +       if (curr) {
> +               while (curr) {
> +                       if (entry->flow->mcam_id > curr->flow->mcam_id)
> +                               curr = TAILQ_NEXT(curr, next);
> +                       else
> +                               break;
>                 }
> -               step++;
> +               if (curr)
> +                       TAILQ_INSERT_BEFORE(curr, entry, next);
> +               else
> +                       TAILQ_INSERT_TAIL(list, entry, next);
> +       } else {
> +               TAILQ_INSERT_HEAD(list, entry, next);
>         }
> -       *prio = NPC_MCAM_ANY_PRIO;
> -       return 0;
>  }
>
>  static int
> -npc_fill_entry_cache(struct mbox *mbox, struct roc_npc_flow *flow,
> -                    struct npc *npc, uint32_t *free_ent)
> +npc_allocate_mcam_entry(struct mbox *mbox, int prio,
> +                       struct npc_mcam_alloc_entry_rsp *rsp_local,
> +                       int ref_entry)
>  {
> -       struct plt_bitmap *free_bmp, *free_bmp_rev, *live_bmp, *live_bmp_rev;
> -       struct npc_mcam_alloc_entry_rsp rsp_local;
>         struct npc_mcam_alloc_entry_rsp *rsp_cmd;
>         struct npc_mcam_alloc_entry_req *req;
>         struct npc_mcam_alloc_entry_rsp *rsp;
> -       struct npc_mcam_ents_info *info;
> -       int rc = -ENOSPC, prio;
> -       uint16_t ref_ent, idx;
> -
> -       info = &npc->flow_entry_info[flow->priority];
> -       free_bmp = npc->free_entries[flow->priority];
> -       free_bmp_rev = npc->free_entries_rev[flow->priority];
> -       live_bmp = npc->live_entries[flow->priority];
> -       live_bmp_rev = npc->live_entries_rev[flow->priority];
> -
> -       ref_ent = npc_find_ref_entry(npc, &prio, flow->priority);
> +       int rc = -ENOSPC;
>
>         req = mbox_alloc_msg_npc_mcam_alloc_entry(mbox);
>         if (req == NULL)
>                 return rc;
>         req->contig = 1;
> -       req->count = npc->flow_prealloc_size;
> +       req->count = 1;
>         req->priority = prio;
> -       req->ref_entry = ref_ent;
> +       req->ref_entry = ref_entry;
>
>         rc = mbox_process_msg(mbox, (void *)&rsp_cmd);
>         if (rc)
>                 return rc;
>
> -       rsp = &rsp_local;
> -       memcpy(rsp, rsp_cmd, sizeof(*rsp));
> +       if (!rsp_cmd->count)
> +               return -ENOSPC;
>
> -       /* Non-first ent cache fill */
> -       if (prio != NPC_MCAM_ANY_PRIO) {
> -               npc_validate_and_shift_prio_ent(mbox, flow, npc, rsp, prio);
> -       } else {
> -               /* Copy into response entry list */
> -               for (idx = 0; idx < rsp->count; idx++)
> -                       rsp->entry_list[idx] = rsp->entry + idx;
> -       }
> -
> -       /* Update free entries, reverse free entries list,
> -        * min & max entry ids.
> -        */
> -       for (idx = 0; idx < rsp->count; idx++) {
> -               if (unlikely(rsp->entry_list[idx] < info->min_id))
> -                       info->min_id = rsp->entry_list[idx];
> +       memcpy(rsp_local, rsp_cmd, sizeof(*rsp));
>
> -               if (unlikely(rsp->entry_list[idx] > info->max_id))
> -                       info->max_id = rsp->entry_list[idx];
> +       return 0;
> +}
>
> -               /* Skip entry to be returned, not to be part of free
> -                * list.
> -                */
> -               if (prio == NPC_MCAM_HIGHER_PRIO) {
> -                       if (unlikely(idx == (rsp->count - 1))) {
> -                               *free_ent = rsp->entry_list[idx];
> -                               continue;
> +static void
> +npc_find_mcam_ref_entry(struct roc_npc_flow *flow, struct npc *npc, int *prio,
> +                       int *ref_entry, int dir)
> +{
> +       struct npc_prio_flow_entry *head, *tail;
> +       struct npc_prio_flow_list_head *list;
> +       int prio_idx = flow->priority;
> +
> +       if (dir == NPC_MCAM_LOWER_PRIO) {
> +               while (prio_idx >= 0) {
> +                       list = &npc->prio_flow_list[prio_idx];
> +                       head = TAILQ_FIRST(list);
> +                       if (head) {
> +                               *prio = NPC_MCAM_LOWER_PRIO;
> +                               *ref_entry = head->flow->mcam_id;
> +                               return;
>                         }
> -               } else {
> -                       if (unlikely(!idx)) {
> -                               *free_ent = rsp->entry_list[idx];
> -                               continue;
> +                       prio_idx--;
> +               }
> +       } else if (dir == NPC_MCAM_HIGHER_PRIO) {
> +               prio_idx = flow->priority;
> +               while (prio_idx <= npc->flow_max_priority - 1) {
> +                       list = &npc->prio_flow_list[prio_idx];
> +                       tail = TAILQ_LAST(list, npc_prio_flow_list_head);
> +                       if (tail) {
> +                               *prio = NPC_MCAM_HIGHER_PRIO;
> +                               *ref_entry = tail->flow->mcam_id;
> +                               return;
>                         }
> +                       prio_idx++;
>                 }
> -               info->free_ent++;
> -               plt_bitmap_set(free_bmp, rsp->entry_list[idx]);
> -               plt_bitmap_set(free_bmp_rev,
> -                              npc->mcam_entries - rsp->entry_list[idx] - 1);
>         }
> +       *prio = NPC_MCAM_ANY_PRIO;
> +       *ref_entry = 0;
> +}
>
> -       info->live_ent++;
> -       plt_bitmap_set(live_bmp, *free_ent);
> -       plt_bitmap_set(live_bmp_rev, npc->mcam_entries - *free_ent - 1);
> +static int
> +npc_alloc_mcam_by_ref_entry(struct mbox *mbox, struct roc_npc_flow *flow,
> +                           struct npc *npc,
> +                           struct npc_mcam_alloc_entry_rsp *rsp_local)
> +{
> +       int prio, ref_entry = 0, rc = 0, dir = NPC_MCAM_LOWER_PRIO;
> +       bool retry_done = false;
> +
> +retry:
> +       npc_find_mcam_ref_entry(flow, npc, &prio, &ref_entry, dir);
> +       rc = npc_allocate_mcam_entry(mbox, prio, rsp_local, ref_entry);
> +       if (rc && !retry_done) {
> +               plt_info(
> +                       "npc: Failed to allocate lower priority entry. Retrying for higher priority");
> +
> +               dir = NPC_MCAM_HIGHER_PRIO;
> +               retry_done = true;
> +               goto retry;
> +       } else if (rc && retry_done) {
> +               return rc;
> +       }
>
>         return 0;
>  }
>
>  int
> -npc_check_preallocated_entry_cache(struct mbox *mbox, struct roc_npc_flow *flow,
> -                                  struct npc *npc)
> +npc_get_free_mcam_entry(struct mbox *mbox, struct roc_npc_flow *flow,
> +                       struct npc *npc)
>  {
> -       struct plt_bitmap *free, *free_rev, *live, *live_rev;
> -       uint32_t pos = 0, free_ent = 0, mcam_entries;
> -       struct npc_mcam_ents_info *info;
> -       uint64_t slab = 0;
> -       int rc;
> -
> -       info = &npc->flow_entry_info[flow->priority];
> -
> -       free_rev = npc->free_entries_rev[flow->priority];
> -       free = npc->free_entries[flow->priority];
> -       live_rev = npc->live_entries_rev[flow->priority];
> -       live = npc->live_entries[flow->priority];
> -       mcam_entries = npc->mcam_entries;
> -
> -       if (info->free_ent) {
> -               rc = plt_bitmap_scan(free, &pos, &slab);
> -               if (rc) {
> -                       /* Get free_ent from free entry bitmap */
> -                       free_ent = pos + __builtin_ctzll(slab);
> -                       /* Remove from free bitmaps and add to live ones */
> -                       plt_bitmap_clear(free, free_ent);
> -                       plt_bitmap_set(live, free_ent);
> -                       plt_bitmap_clear(free_rev, mcam_entries - free_ent - 1);
> -                       plt_bitmap_set(live_rev, mcam_entries - free_ent - 1);
> -
> -                       info->free_ent--;
> -                       info->live_ent++;
> -                       return free_ent;
> -               }
> -               return NPC_ERR_INTERNAL;
> -       }
> +       struct npc_mcam_alloc_entry_rsp rsp_local;
> +       struct npc_prio_flow_entry *new_entry;
> +       int rc = 0;
> +
> +       rc = npc_alloc_mcam_by_ref_entry(mbox, flow, npc, &rsp_local);
>
> -       rc = npc_fill_entry_cache(mbox, flow, npc, &free_ent);
>         if (rc)
>                 return rc;
>
> -       return free_ent;
> +       new_entry = plt_zmalloc(sizeof(*new_entry), 0);
> +       if (!new_entry)
> +               return -ENOSPC;
> +
> +       new_entry->flow = flow;
> +
> +       plt_info("npc: kernel allocated MCAM entry %d", rsp_local.entry);
> +
> +       rc = npc_sort_mcams_by_user_prio_level(mbox, new_entry, npc,
> +                                              &rsp_local);
> +       if (rc)
> +               goto err;
> +
> +       plt_info("npc: allocated MCAM entry after sorting %d", rsp_local.entry);
> +       flow->mcam_id = rsp_local.entry;
> +       npc_insert_into_flow_list(npc, new_entry);
> +
> +       return rsp_local.entry;
> +err:
> +       plt_free(new_entry);
> +       return rc;
> +}
> +
> +void
> +npc_delete_prio_list_entry(struct npc *npc, struct roc_npc_flow *flow)
> +{
> +       struct npc_prio_flow_list_head *list;
> +       struct npc_prio_flow_entry *curr;
> +
> +       list = &npc->prio_flow_list[flow->priority];
> +       curr = TAILQ_FIRST(list);
> +
> +       if (!curr)
> +               return;
> +
> +       while (curr) {
> +               if (flow->mcam_id == curr->flow->mcam_id) {
> +                       TAILQ_REMOVE(list, curr, next);
> +                       plt_free(curr);
> +                       break;
> +               }
> +               curr = TAILQ_NEXT(curr, next);
> +       }
>  }
> --
> 2.25.4
>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-10-19 19:05 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-05  2:55 [dpdk-dev] [PATCH] common/cnxk: change MCAM entries management scheme psatheesh
2021-10-19 19:05 ` Jerin Jacob

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).