DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH v1 1/1] ethdev: introduce pool sort capability
@ 2022-08-12 10:46 Hanumanth Pothula
  2022-08-12 13:27 ` Morten Brørup
  2022-08-12 17:24 ` [PATCH v2 1/3] " Hanumanth Pothula
  0 siblings, 2 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-08-12 10:46 UTC (permalink / raw)
  To: Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko
  Cc: dev, xuan.ding, wenxuanx.wu, xiaoyun.li, stephen, yuanx.wang,
	mdr, yuying.zhang, qi.z.zhang, viacheslavo, jerinj, ndabilpuram,
	Hanumanth Pothula

Presently, the 'Buffer Split' feature supports sending multiple
segments of the received packet to PMD, which programs the HW
to receive the packet in segments from different pools.

This patch extends the feature to support the pool sort capability.
Some of the HW has support for choosing memory pools based on the
packet's size. The pool sort capability allows PMD to choose a
memory pool based on the packet's length.

This is often useful for saving the memory where the application
can create a different pool to steer the specific size of the
packet, thus enabling effective use of memory.

For example, let's say HW has a capability of three pools,
 - pool-1 size is 2K
 - pool-2 size is > 2K and < 4K
 - pool-3 size is > 4K
Here,
        pool-1 can accommodate packets with sizes < 2K
        pool-2 can accommodate packets with sizes > 2K and < 4K
        pool-3 can accommodate packets with sizes > 4K

With pool sort capability enabled in SW, an application may create
three pools of different sizes and send them to PMD. Allowing PMD
to program HW based on packet lengths. So that packets with less
than 2K are received on pool-1, packets with lengths between 2K
and 4K are received on pool-2 and finally packets greater than 4K
are received on pool-3.

The following two capabilities are added to the rte_eth_rxseg_capa
structure,
1. pool_sort --> tells pool sort capability is supported by HW.
2. max_npool --> max number of pools supported by HW.

Defined new structure rte_eth_rxseg_sort, to be used only when pool
sort capability is present. If required this may be extended further
to support more configurations.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
Change-Id: I5a2485a7919616902c468c767b5c01834d4a2c27
---
 lib/ethdev/rte_ethdev.c | 81 ++++++++++++++++++++++++++++++++++++++---
 lib/ethdev/rte_ethdev.h | 46 +++++++++++++++++++++--
 2 files changed, 119 insertions(+), 8 deletions(-)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 1979dc0850..e21a651787 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1634,6 +1634,54 @@ rte_eth_dev_is_removed(uint16_t port_id)
 	return ret;
 }
 
+static int
+rte_eth_rx_queue_check_sort(const struct rte_eth_rxseg_sort *rx_seg,
+			     uint16_t n_seg, uint32_t *mbp_buf_size,
+			     const struct rte_eth_dev_info *dev_info)
+{
+	const struct rte_eth_rxseg_capa *seg_capa = &dev_info->rx_seg_capa;
+	uint16_t seg_idx;
+
+	if (!seg_capa->multi_pools || n_seg > seg_capa->max_npool) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid capabilities, multi_pools:%d differnt length segments %u exceed supported %u\n",
+			       seg_capa->multi_pools, n_seg, seg_capa->max_nseg);
+		return -EINVAL;
+	}
+
+	for (seg_idx = 0; seg_idx < n_seg; seg_idx++) {
+		struct rte_mempool *mpl = rx_seg[seg_idx].mp;
+		uint32_t length = rx_seg[seg_idx].length;
+
+		if (mpl == NULL) {
+			RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");
+			return -EINVAL;
+		}
+
+		if (mpl->private_data_size <
+			sizeof(struct rte_pktmbuf_pool_private)) {
+			RTE_ETHDEV_LOG(ERR,
+				       "%s private_data_size %u < %u\n",
+				       mpl->name, mpl->private_data_size,
+				       (unsigned int)sizeof
+					(struct rte_pktmbuf_pool_private));
+			return -ENOSPC;
+		}
+
+		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
+		length = length != 0 ? length : (*mbp_buf_size - RTE_PKTMBUF_HEADROOM);
+		if (*mbp_buf_size < length + RTE_PKTMBUF_HEADROOM) {
+			RTE_ETHDEV_LOG(ERR,
+				       "%s mbuf_data_room_size %u < %u))\n",
+				       mpl->name, *mbp_buf_size,
+				       length);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int
 rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 			     uint16_t n_seg, uint32_t *mbp_buf_size,
@@ -1693,7 +1741,11 @@ rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 		}
 		offset += seg_idx != 0 ? 0 : RTE_PKTMBUF_HEADROOM;
 		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
-		length = length != 0 ? length : *mbp_buf_size;
+		/* On segment length == 0, update segment's length with
+		 * the pool's length - headeroom space, to make sure enough
+		 * space is accomidate for header.
+		 **/
+		length = length != 0 ? length : (*mbp_buf_size - RTE_PKTMBUF_HEADROOM);
 		if (*mbp_buf_size < length + offset) {
 			RTE_ETHDEV_LOG(ERR,
 				       "%s mbuf_data_room_size %u < %u (segment length=%u + segment offset=%u)\n",
@@ -1765,6 +1817,7 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		}
 	} else {
 		const struct rte_eth_rxseg_split *rx_seg;
+		const struct rte_eth_rxseg_sort *rx_sort;
 		uint16_t n_seg;
 
 		/* Extended multi-segment configuration check. */
@@ -1774,13 +1827,31 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 			return -EINVAL;
 		}
 
-		rx_seg = (const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
 		n_seg = rx_conf->rx_nseg;
 
 		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
-			ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
-							   &mbp_buf_size,
-							   &dev_info);
+			ret = -1; /* To make sure at least one of below conditions becomes true */
+
+			/* Check both NIX and application supports buffer-split capability */
+			if (dev_info.rx_seg_capa.mode_flag == RTE_ETH_RXSEG_MODE_SPLIT &&
+			    rx_conf->rx_seg->mode_flag == RTE_ETH_RXSEG_MODE_SPLIT) {
+				rx_seg = (const struct rte_eth_rxseg_split *)
+					 &(rx_conf->rx_seg->split);
+				ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
+								   &mbp_buf_size,
+								   &dev_info);
+			}
+
+			/* Check both NIX and application supports pool-sort capability */
+			if (dev_info.rx_seg_capa.mode_flag == RTE_ETH_RXSEG_MODE_SORT &&
+			    rx_conf->rx_seg->mode_flag == RTE_ETH_RXSEG_MODE_SORT) {
+				rx_sort = (const struct rte_eth_rxseg_sort *)
+					  &(rx_conf->rx_seg->sort);
+				ret = rte_eth_rx_queue_check_sort(rx_sort, n_seg,
+								  &mbp_buf_size,
+								  &dev_info);
+			}
+
 			if (ret != 0)
 				return ret;
 		} else {
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index de9e970d4d..9ff8ba8085 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1204,16 +1204,53 @@ struct rte_eth_rxseg_split {
 	uint32_t reserved; /**< Reserved field. */
 };
 
+/**
+ * The pool sort capability allows PMD to choose a memory pool based on the
+ * packet's length. So, basically, PMD programs HW for receiving packets from
+ * different pools, based on the packet's length.
+ *
+ * This is often useful for saving the memory where the application can create
+ * a different pool to steer the specific size of the packet, thus enabling
+ * effective use of memory.
+ */
+struct rte_eth_rxseg_sort {
+	struct rte_mempool *mp; /**< Memory pool to allocate packets from. */
+	uint16_t length; /**< Packet data length. */
+	uint32_t reserved; /**< Reserved field. */
+};
+
+enum rte_eth_rxseg_mode {
+	/**
+	 * Buffer split mode: PMD split the received packets into multiple segments.
+	 * @see struct rte_eth_rxseg_split
+	 */
+	RTE_ETH_RXSEG_MODE_SPLIT = RTE_BIT64(0),
+	/**
+	 * Pool sort mode: PMD to chooses a memory pool based on the packet's length.
+	 * @see struct rte_eth_rxseg_sort
+	 */
+	RTE_ETH_RXSEG_MODE_SORT  = RTE_BIT64(1),
+};
+
 /**
  * @warning
  * @b EXPERIMENTAL: this structure may change without prior notice.
  *
  * A common structure used to describe Rx packet segment properties.
  */
-union rte_eth_rxseg {
+struct rte_eth_rxseg {
+
+	/**
+	 * PMD may support more than one rxseg mode. This allows application
+	 * to chose which mode to enable.
+	 */
+	enum rte_eth_rxseg_mode mode_flag;
+
 	/* The settings for buffer split offload. */
 	struct rte_eth_rxseg_split split;
-	/* The other features settings should be added here. */
+
+	/*The settings for packet sort offload. */
+	struct rte_eth_rxseg_sort sort;
 };
 
 /**
@@ -1246,7 +1283,7 @@ struct rte_eth_rxconf {
 	 * The supported capabilities of receiving segmentation is reported
 	 * in rte_eth_dev_info.rx_seg_capa field.
 	 */
-	union rte_eth_rxseg *rx_seg;
+	struct rte_eth_rxseg *rx_seg;
 
 	uint64_t reserved_64s[2]; /**< Reserved for future fields */
 	void *reserved_ptrs[2];   /**< Reserved for future fields */
@@ -1831,6 +1868,9 @@ struct rte_eth_rxseg_capa {
 	uint32_t offset_allowed:1; /**< Supports buffer offsets. */
 	uint32_t offset_align_log2:4; /**< Required offset alignment. */
 	uint16_t max_nseg; /**< Maximum amount of segments to split. */
+	/* < Maximum amount of pools that PMD can sort based on packet/segment lengths */
+	uint16_t max_npool;
+	enum rte_eth_rxseg_mode mode_flag; /**< supported rxseg  modes */
 	uint16_t reserved; /**< Reserved field. */
 };
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [PATCH v1 1/1] ethdev: introduce pool sort capability
  2022-08-12 10:46 [PATCH v1 1/1] ethdev: introduce pool sort capability Hanumanth Pothula
@ 2022-08-12 13:27 ` Morten Brørup
  2022-08-12 17:24 ` [PATCH v2 1/3] " Hanumanth Pothula
  1 sibling, 0 replies; 75+ messages in thread
From: Morten Brørup @ 2022-08-12 13:27 UTC (permalink / raw)
  To: Hanumanth Pothula, Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko
  Cc: dev, xuan.ding, wenxuanx.wu, xiaoyun.li, stephen, yuanx.wang,
	mdr, yuying.zhang, qi.z.zhang, viacheslavo, jerinj, ndabilpuram

> From: Hanumanth Pothula [mailto:hpothula@marvell.com]
> Sent: Friday, 12 August 2022 12.47
> 
> Presently, the 'Buffer Split' feature supports sending multiple
> segments of the received packet to PMD, which programs the HW
> to receive the packet in segments from different pools.
> 
> This patch extends the feature to support the pool sort capability.
> Some of the HW has support for choosing memory pools based on the
> packet's size. The pool sort capability allows PMD to choose a
> memory pool based on the packet's length.
> 
> This is often useful for saving the memory where the application
> can create a different pool to steer the specific size of the
> packet, thus enabling effective use of memory.
> 
> For example, let's say HW has a capability of three pools,
>  - pool-1 size is 2K
>  - pool-2 size is > 2K and < 4K
>  - pool-3 size is > 4K
> Here,
>         pool-1 can accommodate packets with sizes < 2K
>         pool-2 can accommodate packets with sizes > 2K and < 4K
>         pool-3 can accommodate packets with sizes > 4K
> 
> With pool sort capability enabled in SW, an application may create
> three pools of different sizes and send them to PMD. Allowing PMD
> to program HW based on packet lengths. So that packets with less
> than 2K are received on pool-1, packets with lengths between 2K
> and 4K are received on pool-2 and finally packets greater than 4K
> are received on pool-3.
> 
> The following two capabilities are added to the rte_eth_rxseg_capa
> structure,
> 1. pool_sort --> tells pool sort capability is supported by HW.
> 2. max_npool --> max number of pools supported by HW.
> 
> Defined new structure rte_eth_rxseg_sort, to be used only when pool
> sort capability is present. If required this may be extended further
> to support more configurations.
> 
> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> Change-Id: I5a2485a7919616902c468c767b5c01834d4a2c27
> ---

I like the concept of a PMD being able to use different mbuf pools depending on packet size.

However, the "pool sort" feature is not an extension of the "buffer split" feature, but a separate feature. The API and documentation must reflect this.

Please also consider this, when you implement it in the drivers: If no buffers are available in one of the pools, the next (larger) pool should be used instead of dropping the packet.

Here's another example use case: Assuming that 25 % of internet traffic is tiny packets (e.g. empty TCP ACK packets), a separate pool for those could be used.


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v2 1/3] ethdev: introduce pool sort capability
  2022-08-12 10:46 [PATCH v1 1/1] ethdev: introduce pool sort capability Hanumanth Pothula
  2022-08-12 13:27 ` Morten Brørup
@ 2022-08-12 17:24 ` Hanumanth Pothula
  2022-08-12 17:24   ` [PATCH v2 2/3] app/testpmd: add command line argument 'rxseg-mode' Hanumanth Pothula
                     ` (3 more replies)
  1 sibling, 4 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-08-12 17:24 UTC (permalink / raw)
  To: Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko
  Cc: dev, xuan.ding, wenxuanx.wu, xiaoyun.li, stephen, yuanx.wang,
	mdr, yuying.zhang, qi.z.zhang, viacheslavo, jerinj, ndabilpuram,
	Hanumanth Pothula

Presently, the 'Buffer Split' feature supports sending multiple
segments of the received packet to PMD, which programs the HW
to receive the packet in segments from different pools.

This patch extends the feature to support the pool sort capability.
Some of the HW has support for choosing memory pools based on the
packet's size. The pool sort capability allows PMD to choose a
memory pool based on the packet's length.

This is often useful for saving the memory where the application
can create a different pool to steer the specific size of the
packet, thus enabling effective use of memory.

For example, let's say HW has a capability of three pools,
 - pool-1 size is 2K
 - pool-2 size is > 2K and < 4K
 - pool-3 size is > 4K
Here,
        pool-1 can accommodate packets with sizes < 2K
        pool-2 can accommodate packets with sizes > 2K and < 4K
        pool-3 can accommodate packets with sizes > 4K

With pool sort capability enabled in SW, an application may create
three pools of different sizes and send them to PMD. Allowing PMD
to program HW based on packet lengths. So that packets with less
than 2K are received on pool-1, packets with lengths between 2K
and 4K are received on pool-2 and finally packets greater than 4K
are received on pool-3.

The following two capabilities are added to the rte_eth_rxseg_capa
structure,
1. pool_sort --> tells pool sort capability is supported by HW.
2. max_npool --> max number of pools supported by HW.

Defined new structure rte_eth_rxseg_sort, to be used only when pool
sort capability is present. If required this may be extended further
to support more configurations.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>

v2:
 - Along with spec changes, uploading testpmd and driver changes.
---
 lib/ethdev/rte_ethdev.c | 87 +++++++++++++++++++++++++++++++++++------
 lib/ethdev/rte_ethdev.h | 45 +++++++++++++++++++--
 2 files changed, 118 insertions(+), 14 deletions(-)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 1979dc0850..7fd5443eb8 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1635,7 +1635,55 @@ rte_eth_dev_is_removed(uint16_t port_id)
 }
 
 static int
-rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
+rte_eth_rx_queue_check_sort(const struct rte_eth_rxseg *rx_seg,
+			     uint16_t n_seg, uint32_t *mbp_buf_size,
+			     const struct rte_eth_dev_info *dev_info)
+{
+	const struct rte_eth_rxseg_capa *seg_capa = &dev_info->rx_seg_capa;
+	uint16_t seg_idx;
+
+	if (!seg_capa->multi_pools || n_seg > seg_capa->max_npool) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid capabilities, multi_pools:%d differnt length segments %u exceed supported %u\n",
+			       seg_capa->multi_pools, n_seg, seg_capa->max_nseg);
+		return -EINVAL;
+	}
+
+	for (seg_idx = 0; seg_idx < n_seg; seg_idx++) {
+		struct rte_mempool *mpl = rx_seg[seg_idx].sort.mp;
+		uint32_t length = rx_seg[seg_idx].sort.length;
+
+		if (mpl == NULL) {
+			RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");
+			return -EINVAL;
+		}
+
+		if (mpl->private_data_size <
+			sizeof(struct rte_pktmbuf_pool_private)) {
+			RTE_ETHDEV_LOG(ERR,
+				       "%s private_data_size %u < %u\n",
+				       mpl->name, mpl->private_data_size,
+				       (unsigned int)sizeof
+					(struct rte_pktmbuf_pool_private));
+			return -ENOSPC;
+		}
+
+		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
+		length = length != 0 ? length : (*mbp_buf_size - RTE_PKTMBUF_HEADROOM);
+		if (*mbp_buf_size < length + RTE_PKTMBUF_HEADROOM) {
+			RTE_ETHDEV_LOG(ERR,
+				       "%s mbuf_data_room_size %u < %u))\n",
+				       mpl->name, *mbp_buf_size,
+				       length);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int
+rte_eth_rx_queue_check_split(const struct rte_eth_rxseg *rx_seg,
 			     uint16_t n_seg, uint32_t *mbp_buf_size,
 			     const struct rte_eth_dev_info *dev_info)
 {
@@ -1654,12 +1702,12 @@ rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 	 * Check the sizes and offsets against buffer sizes
 	 * for each segment specified in extended configuration.
 	 */
-	mp_first = rx_seg[0].mp;
+	mp_first = rx_seg[0].split.mp;
 	offset_mask = RTE_BIT32(seg_capa->offset_align_log2) - 1;
 	for (seg_idx = 0; seg_idx < n_seg; seg_idx++) {
-		struct rte_mempool *mpl = rx_seg[seg_idx].mp;
-		uint32_t length = rx_seg[seg_idx].length;
-		uint32_t offset = rx_seg[seg_idx].offset;
+		struct rte_mempool *mpl = rx_seg[seg_idx].split.mp;
+		uint32_t length = rx_seg[seg_idx].split.length;
+		uint32_t offset = rx_seg[seg_idx].split.offset;
 
 		if (mpl == NULL) {
 			RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");
@@ -1693,7 +1741,11 @@ rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 		}
 		offset += seg_idx != 0 ? 0 : RTE_PKTMBUF_HEADROOM;
 		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
-		length = length != 0 ? length : *mbp_buf_size;
+		/* On segment length == 0, update segment's length with
+		 * the pool's length - headeroom space, to make sure enough
+		 * space is accomidate for header.
+		 **/
+		length = length != 0 ? length : (*mbp_buf_size - RTE_PKTMBUF_HEADROOM);
 		if (*mbp_buf_size < length + offset) {
 			RTE_ETHDEV_LOG(ERR,
 				       "%s mbuf_data_room_size %u < %u (segment length=%u + segment offset=%u)\n",
@@ -1764,7 +1816,6 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 			return -EINVAL;
 		}
 	} else {
-		const struct rte_eth_rxseg_split *rx_seg;
 		uint16_t n_seg;
 
 		/* Extended multi-segment configuration check. */
@@ -1774,13 +1825,27 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 			return -EINVAL;
 		}
 
-		rx_seg = (const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
 		n_seg = rx_conf->rx_nseg;
 
 		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
-			ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
-							   &mbp_buf_size,
-							   &dev_info);
+			ret = -1; /* To make sure at least one of below conditions becomes true */
+
+			/* Check both NIX and application supports buffer-split capability */
+			if (dev_info.rx_seg_capa.mode_split &&
+			    rx_conf->mode_flag == RTE_ETH_RXSEG_MODE_SPLIT) {
+				ret = rte_eth_rx_queue_check_split(rx_conf->rx_seg, n_seg,
+								   &mbp_buf_size,
+								   &dev_info);
+			}
+
+			/* Check both NIX and application supports pool-sort capability */
+			if (dev_info.rx_seg_capa.mode_sort &&
+			    rx_conf->mode_flag == RTE_ETH_RXSEG_MODE_SORT) {
+				ret = rte_eth_rx_queue_check_sort(rx_conf->rx_seg, n_seg,
+								  &mbp_buf_size,
+								  &dev_info);
+			}
+
 			if (ret != 0)
 				return ret;
 		} else {
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index de9e970d4d..9f6787d7ad 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1204,16 +1204,46 @@ struct rte_eth_rxseg_split {
 	uint32_t reserved; /**< Reserved field. */
 };
 
+/**
+ * The pool sort capability allows PMD to choose a memory pool based on the
+ * packet's length. So, basically, PMD programs HW for receiving packets from
+ * different pools, based on the packet's length.
+ *
+ * This is often useful for saving the memory where the application can create
+ * a different pool to steer the specific size of the packet, thus enabling
+ * effective use of memory.
+ */
+struct rte_eth_rxseg_sort {
+	struct rte_mempool *mp; /**< Memory pool to allocate packets from. */
+	uint16_t length; /**< Packet data length. */
+	uint32_t reserved; /**< Reserved field. */
+};
+
+enum rte_eth_rxseg_mode {
+	/**
+	 * Buffer split mode: PMD split the received packets into multiple segments.
+	 * @see struct rte_eth_rxseg_split
+	 */
+	RTE_ETH_RXSEG_MODE_SPLIT = RTE_BIT64(0),
+	/**
+	 * Pool sort mode: PMD to chooses a memory pool based on the packet's length.
+	 * @see struct rte_eth_rxseg_sort
+	 */
+	RTE_ETH_RXSEG_MODE_SORT  = RTE_BIT64(1),
+};
+
 /**
  * @warning
  * @b EXPERIMENTAL: this structure may change without prior notice.
  *
  * A common structure used to describe Rx packet segment properties.
  */
-union rte_eth_rxseg {
+struct rte_eth_rxseg {
 	/* The settings for buffer split offload. */
 	struct rte_eth_rxseg_split split;
-	/* The other features settings should be added here. */
+
+	/*The settings for packet sort offload. */
+	struct rte_eth_rxseg_sort sort;
 };
 
 /**
@@ -1239,6 +1269,11 @@ struct rte_eth_rxconf {
 	 * fields on rte_eth_dev_info structure are allowed to be set.
 	 */
 	uint64_t offloads;
+	/**
+	 * PMD may support more than one rxseg mode. This allows application
+	 * to chose which mode to enable.
+	 */
+	enum rte_eth_rxseg_mode mode_flag;
 	/**
 	 * Points to the array of segment descriptions for an entire packet.
 	 * Array elements are properties for consecutive Rx segments.
@@ -1246,7 +1281,7 @@ struct rte_eth_rxconf {
 	 * The supported capabilities of receiving segmentation is reported
 	 * in rte_eth_dev_info.rx_seg_capa field.
 	 */
-	union rte_eth_rxseg *rx_seg;
+	struct rte_eth_rxseg *rx_seg;
 
 	uint64_t reserved_64s[2]; /**< Reserved for future fields */
 	void *reserved_ptrs[2];   /**< Reserved for future fields */
@@ -1827,10 +1862,14 @@ struct rte_eth_switch_info {
  */
 struct rte_eth_rxseg_capa {
 	__extension__
+	uint32_t mode_split : 1; /**< Supports buffer split capability @see struct rte_eth_rxseg_split */
+	uint32_t mode_sort : 1; /**< Supports pool sort capability @see struct rte_eth_rxseg_sort */
 	uint32_t multi_pools:1; /**< Supports receiving to multiple pools.*/
 	uint32_t offset_allowed:1; /**< Supports buffer offsets. */
 	uint32_t offset_align_log2:4; /**< Required offset alignment. */
 	uint16_t max_nseg; /**< Maximum amount of segments to split. */
+	/* < Maximum amount of pools that PMD can sort based on packet/segment lengths */
+	uint16_t max_npool;
 	uint16_t reserved; /**< Reserved field. */
 };
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v2 2/3] app/testpmd: add command line argument 'rxseg-mode'
  2022-08-12 17:24 ` [PATCH v2 1/3] " Hanumanth Pothula
@ 2022-08-12 17:24   ` Hanumanth Pothula
  2022-08-12 17:24   ` [PATCH v2 3/3] net/cnxk: introduce pool sort capability Hanumanth Pothula
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-08-12 17:24 UTC (permalink / raw)
  To: Aman Singh, Yuying Zhang
  Cc: dev, andrew.rybchenko, xuan.ding, wenxuanx.wu, thomas,
	xiaoyun.li, stephen, yuanx.wang, mdr, ferruh.yigit, qi.z.zhang,
	viacheslavo, jerinj, ndabilpuram, Hanumanth Pothula

With rxseg-mode command line argument, application can choose
either buffer split or pool sort capability. This might be
helpful if HW has support for both capabilities and application
wants to enable one of them.

By default, buffer-split capability is enabled to enable pool-sort
capability pass command line argument, '--rxseg-mode=2'.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
---
 app/test-pmd/parameters.c | 16 ++++++++++++++++
 app/test-pmd/testpmd.c    | 35 ++++++++++++++++++++++++++---------
 app/test-pmd/testpmd.h    |  2 ++
 app/test-pmd/util.c       |  4 ++--
 4 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index e3c9757f3f..c3876a9b5f 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -213,6 +213,9 @@ usage(char* progname)
 	printf("  --hairpin-mode=0xXX: bitmask set the hairpin port mode.\n"
 	       "    0x10 - explicit Tx rule, 0x02 - hairpin ports paired\n"
 	       "    0x01 - hairpin ports loop, 0x00 - hairpin port self\n");
+	printf(" --rxseg-mode: provide rxseg capbility\n"
+	       "    1 - Bufer-split capability\n"
+	       "    2 - Pool-sort capability\n");
 }
 
 #ifdef RTE_LIB_CMDLINE
@@ -710,6 +713,7 @@ launch_args_parse(int argc, char** argv)
 		{ "record-burst-stats",         0, 0, 0 },
 		{ PARAM_NUM_PROCS,              1, 0, 0 },
 		{ PARAM_PROC_ID,                1, 0, 0 },
+		{ "rxseg-mode",                 1, 0, 0 },
 		{ 0, 0, 0, 0 },
 	};
 
@@ -1510,6 +1514,18 @@ launch_args_parse(int argc, char** argv)
 				num_procs = atoi(optarg);
 			if (!strcmp(lgopts[opt_idx].name, PARAM_PROC_ID))
 				proc_id = atoi(optarg);
+			if (!strcmp(lgopts[opt_idx].name, "rxseg-mode")) {
+				char *end = NULL;
+				unsigned int n;
+
+				errno = 0;
+				n = strtoul(optarg, &end, 0);
+				if (errno != 0 || end == optarg ||
+				    n < RTE_ETH_RXSEG_MODE_SPLIT || n > RTE_ETH_RXSEG_MODE_SORT)
+					rte_exit(EXIT_FAILURE, "invalid rxseg mode\n");
+				else
+					rxseg_mode = (uint8_t)n;
+			}
 			break;
 		case 'h':
 			usage(argv[0]);
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index addcbcac85..b5b4fcd66e 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -411,6 +411,9 @@ uint8_t clear_ptypes = true;
 /* Hairpin ports configuration mode. */
 uint16_t hairpin_mode;
 
+/* send Rxseg mode */
+uint8_t rxseg_mode = RTE_ETH_RXSEG_MODE_SPLIT;
+
 /* Pretty printing of ethdev events */
 static const char * const eth_event_desc[] = {
 	[RTE_ETH_EVENT_UNKNOWN] = "unknown",
@@ -2656,7 +2659,7 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	       uint16_t nb_rx_desc, unsigned int socket_id,
 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
 {
-	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
+	struct rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
 	unsigned int i, mp_n;
 	int ret;
 
@@ -2670,24 +2673,38 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		goto exit;
 	}
 	for (i = 0; i < rx_pkt_nb_segs; i++) {
-		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
+		struct rte_eth_rxseg_split *rx_split = &rx_useg[i].split;
+		struct rte_eth_rxseg_sort  *rx_sort = &rx_useg[i].sort;
 		struct rte_mempool *mpx;
+
 		/*
 		 * Use last valid pool for the segments with number
 		 * exceeding the pool index.
 		 */
 		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
 		mpx = mbuf_pool_find(socket_id, mp_n);
-		/* Handle zero as mbuf data buffer size. */
-		rx_seg->length = rx_pkt_seg_lengths[i] ?
-				   rx_pkt_seg_lengths[i] :
-				   mbuf_data_size[mp_n];
-		rx_seg->offset = i < rx_pkt_nb_offs ?
-				   rx_pkt_seg_offsets[i] : 0;
-		rx_seg->mp = mpx ? mpx : mp;
+		if (rxseg_mode == RTE_ETH_RXSEG_MODE_SPLIT) {
+			/**
+			 * On Segment length zero, update length as,
+			 *      buffer size - headroom size
+			 * to make sure enough space is accomidate for header.
+			 */
+			rx_split->length = rx_pkt_seg_lengths[i] ?
+					   rx_pkt_seg_lengths[i] :
+					   mbuf_data_size[mp_n] - RTE_PKTMBUF_HEADROOM;
+			rx_split->offset = i < rx_pkt_nb_offs ?
+					   rx_pkt_seg_offsets[i] : 0;
+			rx_split->mp = mpx ? mpx : mp;
+		} else if (rxseg_mode == RTE_ETH_RXSEG_MODE_SORT) {
+			rx_sort->length = rx_pkt_seg_lengths[i] ?
+					   rx_pkt_seg_lengths[i] :
+					   mbuf_data_size[mp_n] - RTE_PKTMBUF_HEADROOM;
+			rx_sort->mp = mpx ? mpx : mp;
+		}
 	}
 	rx_conf->rx_nseg = rx_pkt_nb_segs;
 	rx_conf->rx_seg = rx_useg;
+	rx_conf->mode_flag = (enum rte_eth_rxseg_mode)rxseg_mode;
 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
 				    socket_id, rx_conf, NULL);
 	rx_conf->rx_seg = NULL;
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index fb2f5195d3..0dcb3abf01 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -621,6 +621,8 @@ extern struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
 extern uint32_t burst_tx_delay_time; /**< Burst tx delay time(us) for mac-retry. */
 extern uint32_t burst_tx_retry_num;  /**< Burst tx retry number for mac-retry. */
 
+extern uint8_t rxseg_mode;
+
 #ifdef RTE_LIB_GRO
 #define GRO_DEFAULT_ITEM_NUM_PER_FLOW 32
 #define GRO_DEFAULT_FLOW_NUM (RTE_GRO_MAX_BURST_ITEM_NUM / \
diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index fd98e8b51d..f9df5f69ef 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
 				 print_buf, buf_size, &cur_len);
 		MKDUMPSTR(print_buf, buf_size, cur_len,
-			  " - type=0x%04x - length=%u - nb_segs=%d",
-			  eth_type, (unsigned int) mb->pkt_len,
+			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
+			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
 		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v2 3/3] net/cnxk: introduce pool sort capability
  2022-08-12 17:24 ` [PATCH v2 1/3] " Hanumanth Pothula
  2022-08-12 17:24   ` [PATCH v2 2/3] app/testpmd: add command line argument 'rxseg-mode' Hanumanth Pothula
@ 2022-08-12 17:24   ` Hanumanth Pothula
  2022-08-23  3:26   ` [PATCH v2 1/3] ethdev: " Ding, Xuan
  2022-09-02  7:00   ` [PATCH v3 " Hanumanth Pothula
  3 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-08-12 17:24 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
  Cc: dev, andrew.rybchenko, xuan.ding, wenxuanx.wu, thomas,
	xiaoyun.li, stephen, yuanx.wang, mdr, ferruh.yigit, yuying.zhang,
	qi.z.zhang, viacheslavo, jerinj, Hanumanth Pothula

Presently, HW is programmed only to receive packets from LPB pool.
Making all packets received from LPB pool.

But, CNXK HW supports two pools,
 - SPB -> packets with smaller size (less than 4K)
 - LPB -> packets with bigger size (greater than 4K)

Patch enables pool sorting capability, pool is selected based on
packet's length. So, basically, PMD programs HW for receiving
packets from both SPB and LPB pools based on the packet's length.

This is achieved by enabling rx buffer split offload,
RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT. This allows the application to send
more than one pool(in our case two) to the driver, with different
segment(packet) lengths, which helps the driver to configure both
pools based on segment lengths.

This is often useful for saving the memory where the application
can create a different pool to steer the specific size of the
packet, thus enabling effective use of memory.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
---
 doc/guides/nics/features/cnxk.ini     |  1 +
 doc/guides/nics/features/cnxk_vec.ini |  1 +
 drivers/net/cnxk/cnxk_ethdev.c        | 93 ++++++++++++++++++++++++---
 drivers/net/cnxk/cnxk_ethdev.h        |  4 +-
 drivers/net/cnxk/cnxk_ethdev_ops.c    |  7 ++
 5 files changed, 96 insertions(+), 10 deletions(-)

diff --git a/doc/guides/nics/features/cnxk.ini b/doc/guides/nics/features/cnxk.ini
index 1876fe86c7..e1584ed740 100644
--- a/doc/guides/nics/features/cnxk.ini
+++ b/doc/guides/nics/features/cnxk.ini
@@ -4,6 +4,7 @@
 ; Refer to default.ini for the full list of available PMD features.
 ;
 [Features]
+pool sort	     = Y
 Speed capabilities   = Y
 Rx interrupt         = Y
 Lock-free Tx queue   = Y
diff --git a/doc/guides/nics/features/cnxk_vec.ini b/doc/guides/nics/features/cnxk_vec.ini
index 5d0976e6ce..a63d35aae7 100644
--- a/doc/guides/nics/features/cnxk_vec.ini
+++ b/doc/guides/nics/features/cnxk_vec.ini
@@ -4,6 +4,7 @@
 ; Refer to default.ini for the full list of available PMD features.
 ;
 [Features]
+pool sort	     = Y
 Speed capabilities   = Y
 Rx interrupt         = Y
 Lock-free Tx queue   = Y
diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c
index 24182909f1..6bf04dde96 100644
--- a/drivers/net/cnxk/cnxk_ethdev.c
+++ b/drivers/net/cnxk/cnxk_ethdev.c
@@ -537,6 +537,64 @@ cnxk_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid)
 	plt_free(txq_sp);
 }
 
+static int
+cnxk_nix_process_rx_conf(const struct rte_eth_rxconf *rx_conf,
+			 struct rte_mempool **lpb_pool, struct rte_mempool **spb_pool,
+			 uint16_t *lpb_len, uint16_t *spb_len)
+{
+	struct rte_eth_rxseg_sort rx_seg0;
+	struct rte_eth_rxseg_sort rx_seg1;
+	const char *platform_ops;
+	struct rte_mempool_ops *ops;
+
+	if (*lpb_pool || !rx_conf->rx_seg || rx_conf->rx_nseg != CNXK_NIX_NUM_POOLS_MAX ||
+	    !rx_conf->rx_seg[0].sort.mp || !rx_conf->rx_seg[1].sort.mp) {
+		plt_err("invalid arguments");
+		return -EINVAL;
+	}
+
+	rx_seg0 = rx_conf->rx_seg[0].sort;
+	rx_seg1 = rx_conf->rx_seg[1].sort;
+
+	if (rx_seg0.length >= rx_seg0.mp->elt_size || rx_seg1.length >= rx_seg1.mp->elt_size) {
+		plt_err("mismatch in packet length & pool length seg0_len:%u pool0_len:%u"\
+			"seg1_len:%u pool1_len:%u", rx_seg0.length, rx_seg0.mp->elt_size,
+			rx_seg1.length, rx_seg1.mp->elt_size);
+		return -EINVAL;
+	}
+
+	if (rx_seg0.length > rx_seg1.length) {
+		*lpb_pool = rx_seg0.mp;
+		*spb_pool = rx_seg1.mp;
+
+		*lpb_len = rx_seg0.length;
+		*spb_len = rx_seg1.length;
+	} else {
+		*lpb_pool = rx_seg1.mp;
+		*spb_pool = rx_seg0.mp;
+
+		*lpb_len = rx_seg1.length;
+		*spb_len = rx_seg0.length;
+	}
+
+	if ((*spb_pool)->pool_id == 0) {
+		plt_err("Invalid pool_id");
+		return -EINVAL;
+	}
+
+	platform_ops = rte_mbuf_platform_mempool_ops();
+	ops = rte_mempool_get_ops((*spb_pool)->ops_index);
+	if (strncmp(ops->name, platform_ops, RTE_MEMPOOL_OPS_NAMESIZE)) {
+		plt_err("mempool ops should be of cnxk_npa type");
+		return -EINVAL;
+	}
+
+	plt_info("spb_pool:%s lpb_pool:%s lpb_len:%u spb_len:%u\n", (*spb_pool)->name,
+		 (*lpb_pool)->name, *lpb_len, *spb_len);
+
+	return 0;
+}
+
 int
 cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			uint32_t nb_desc, uint16_t fp_rx_q_sz,
@@ -553,6 +611,10 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	uint16_t first_skip;
 	int rc = -EINVAL;
 	size_t rxq_sz;
+	uint16_t lpb_len = 0;
+	uint16_t spb_len = 0;
+	struct rte_mempool *lpb_pool = mp;
+	struct rte_mempool *spb_pool = NULL;
 
 	/* Sanity checks */
 	if (rx_conf->rx_deferred_start == 1) {
@@ -560,15 +622,22 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 		goto fail;
 	}
 
+	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+		rc = cnxk_nix_process_rx_conf(rx_conf, &lpb_pool, &spb_pool,
+					      &lpb_len, &spb_len);
+		if (rc)
+			goto fail;
+	}
+
 	platform_ops = rte_mbuf_platform_mempool_ops();
 	/* This driver needs cnxk_npa mempool ops to work */
-	ops = rte_mempool_get_ops(mp->ops_index);
+	ops = rte_mempool_get_ops(lpb_pool->ops_index);
 	if (strncmp(ops->name, platform_ops, RTE_MEMPOOL_OPS_NAMESIZE)) {
 		plt_err("mempool ops should be of cnxk_npa type");
 		goto fail;
 	}
 
-	if (mp->pool_id == 0) {
+	if (lpb_pool->pool_id == 0) {
 		plt_err("Invalid pool_id");
 		goto fail;
 	}
@@ -585,13 +654,13 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Its a no-op when inline device is not used */
 	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY ||
 	    dev->tx_offloads & RTE_ETH_TX_OFFLOAD_SECURITY)
-		roc_nix_inl_dev_xaq_realloc(mp->pool_id);
+		roc_nix_inl_dev_xaq_realloc(lpb_pool->pool_id);
 
 	/* Increase CQ size to Aura size to avoid CQ overflow and
 	 * then CPT buffer leak.
 	 */
 	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY)
-		nb_desc = nix_inl_cq_sz_clamp_up(nix, mp, nb_desc);
+		nb_desc = nix_inl_cq_sz_clamp_up(nix, lpb_pool, nb_desc);
 
 	/* Setup ROC CQ */
 	cq = &dev->cqs[qid];
@@ -606,23 +675,29 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Setup ROC RQ */
 	rq = &dev->rqs[qid];
 	rq->qid = qid;
-	rq->aura_handle = mp->pool_id;
+	rq->aura_handle = lpb_pool->pool_id;
 	rq->flow_tag_width = 32;
 	rq->sso_ena = false;
 
 	/* Calculate first mbuf skip */
 	first_skip = (sizeof(struct rte_mbuf));
 	first_skip += RTE_PKTMBUF_HEADROOM;
-	first_skip += rte_pktmbuf_priv_size(mp);
+	first_skip += rte_pktmbuf_priv_size(lpb_pool);
 	rq->first_skip = first_skip;
 	rq->later_skip = sizeof(struct rte_mbuf);
-	rq->lpb_size = mp->elt_size;
 	rq->lpb_drop_ena = !(dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY);
+	rq->lpb_size = lpb_len ? lpb_len : lpb_pool->elt_size;
 
 	/* Enable Inline IPSec on RQ, will not be used for Poll mode */
 	if (roc_nix_inl_inb_is_enabled(nix))
 		rq->ipsech_ena = true;
 
+	if (spb_pool) {
+		rq->spb_ena = 1;
+		rq->spb_aura_handle = spb_pool->pool_id;
+		rq->spb_size = spb_len;
+	}
+
 	rc = roc_nix_rq_init(&dev->nix, rq, !!eth_dev->data->dev_started);
 	if (rc) {
 		plt_err("Failed to init roc rq for rq=%d, rc=%d", qid, rc);
@@ -645,7 +720,7 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Queue config should reflect global offloads */
 	rxq_sp->qconf.conf.rx.offloads = dev->rx_offloads;
 	rxq_sp->qconf.nb_desc = nb_desc;
-	rxq_sp->qconf.mp = mp;
+	rxq_sp->qconf.mp = lpb_pool;
 	rxq_sp->tc = 0;
 	rxq_sp->tx_pause = (dev->fc_cfg.mode == RTE_ETH_FC_FULL ||
 			    dev->fc_cfg.mode == RTE_ETH_FC_TX_PAUSE);
@@ -664,7 +739,7 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			goto free_mem;
 	}
 
-	plt_nix_dbg("rq=%d pool=%s nb_desc=%d->%d", qid, mp->name, nb_desc,
+	plt_nix_dbg("rq=%d pool=%s nb_desc=%d->%d", qid, lpb_pool->name, nb_desc,
 		    cq->nb_desc);
 
 	/* Store start of fast path area */
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 4cb7c9e90c..d60515d50a 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -44,6 +44,8 @@
 #define CNXK_NIX_RX_DEFAULT_RING_SZ 4096
 /* Max supported SQB count */
 #define CNXK_NIX_TX_MAX_SQB 512
+/* LPB & SPB */
+#define CNXK_NIX_NUM_POOLS_MAX 2
 
 /* If PTP is enabled additional SEND MEM DESC is required which
  * takes 2 words, hence max 7 iova address are possible
@@ -83,7 +85,7 @@
 	 RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM | RTE_ETH_RX_OFFLOAD_SCATTER |    \
 	 RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_RSS_HASH |    \
 	 RTE_ETH_RX_OFFLOAD_TIMESTAMP | RTE_ETH_RX_OFFLOAD_VLAN_STRIP |        \
-	 RTE_ETH_RX_OFFLOAD_SECURITY)
+	 RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT | RTE_ETH_RX_OFFLOAD_SECURITY)
 
 #define RSS_IPV4_ENABLE                                                        \
 	(RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |                            \
diff --git a/drivers/net/cnxk/cnxk_ethdev_ops.c b/drivers/net/cnxk/cnxk_ethdev_ops.c
index 1592971073..6174a586be 100644
--- a/drivers/net/cnxk/cnxk_ethdev_ops.c
+++ b/drivers/net/cnxk/cnxk_ethdev_ops.c
@@ -69,6 +69,13 @@ cnxk_nix_info_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *devinfo)
 	devinfo->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
 			    RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP |
 			    RTE_ETH_DEV_CAPA_FLOW_RULE_KEEP;
+
+	devinfo->rx_seg_capa = (struct rte_eth_rxseg_capa){
+		.mode_sort = 1,
+		.multi_pools = 1,
+		.max_npool = CNXK_NIX_NUM_POOLS_MAX,
+	};
+
 	return 0;
 }
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [PATCH v2 1/3] ethdev: introduce pool sort capability
  2022-08-12 17:24 ` [PATCH v2 1/3] " Hanumanth Pothula
  2022-08-12 17:24   ` [PATCH v2 2/3] app/testpmd: add command line argument 'rxseg-mode' Hanumanth Pothula
  2022-08-12 17:24   ` [PATCH v2 3/3] net/cnxk: introduce pool sort capability Hanumanth Pothula
@ 2022-08-23  3:26   ` Ding, Xuan
  2022-08-24 15:33     ` Ferruh Yigit
  2022-09-02  7:00   ` [PATCH v3 " Hanumanth Pothula
  3 siblings, 1 reply; 75+ messages in thread
From: Ding, Xuan @ 2022-08-23  3:26 UTC (permalink / raw)
  To: Hanumanth Pothula, Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko
  Cc: dev, Wu, WenxuanX, Li, Xiaoyun, stephen, Wang, YuanX, mdr, Zhang,
	Yuying, Zhang, Qi Z, viacheslavo, jerinj, ndabilpuram

Hi Hanumanth,

> -----Original Message-----
> From: Hanumanth Pothula <hpothula@marvell.com>
> Sent: Saturday, August 13, 2022 1:25 AM
> To: Thomas Monjalon <thomas@monjalon.net>; Ferruh Yigit
> <ferruh.yigit@xilinx.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>
> Cc: dev@dpdk.org; Ding, Xuan <xuan.ding@intel.com>; Wu, WenxuanX
> <wenxuanx.wu@intel.com>; Li, Xiaoyun <xiaoyun.li@intel.com>;
> stephen@networkplumber.org; Wang, YuanX <yuanx.wang@intel.com>;
> mdr@ashroe.eu; Zhang, Yuying <yuying.zhang@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>; viacheslavo@nvidia.com; jerinj@marvell.com;
> ndabilpuram@marvell.com; Hanumanth Pothula <hpothula@marvell.com>
> Subject: [PATCH v2 1/3] ethdev: introduce pool sort capability
> 
> Presently, the 'Buffer Split' feature supports sending multiple segments of
> the received packet to PMD, which programs the HW to receive the packet in
> segments from different pools.
> 
> This patch extends the feature to support the pool sort capability.
> Some of the HW has support for choosing memory pools based on the
> packet's size. The pool sort capability allows PMD to choose a memory pool
> based on the packet's length.
> 
> This is often useful for saving the memory where the application can create a
> different pool to steer the specific size of the packet, thus enabling effective
> use of memory.
> 
> For example, let's say HW has a capability of three pools,
>  - pool-1 size is 2K
>  - pool-2 size is > 2K and < 4K
>  - pool-3 size is > 4K
> Here,
>         pool-1 can accommodate packets with sizes < 2K
>         pool-2 can accommodate packets with sizes > 2K and < 4K
>         pool-3 can accommodate packets with sizes > 4K
> 
> With pool sort capability enabled in SW, an application may create three
> pools of different sizes and send them to PMD. Allowing PMD to program
> HW based on packet lengths. So that packets with less than 2K are received
> on pool-1, packets with lengths between 2K and 4K are received on pool-2
> and finally packets greater than 4K are received on pool-3.
> 
> The following two capabilities are added to the rte_eth_rxseg_capa structure,
> 1. pool_sort --> tells pool sort capability is supported by HW.
> 2. max_npool --> max number of pools supported by HW.
> 
> Defined new structure rte_eth_rxseg_sort, to be used only when pool sort
> capability is present. If required this may be extended further to support
> more configurations.
> 
> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> 
> v2:
>  - Along with spec changes, uploading testpmd and driver changes.

Thanks for CCing. It's an interesting feature.

But I have one question here:
Buffer split is for split receiving packets into multiple segments, while pool sort supports
PMD to put the receiving packets into different pools according to packet size.
Every packet is still intact.

So, at this level, pool sort does not belong to buffer split.
And you already use a different function to check pool sort rather than check buffer split.

Should a new RX offload be introduced? like "RTE_ETH_RX_OFFLOAD_POOL_SORT".

> ---
>  lib/ethdev/rte_ethdev.c | 87 +++++++++++++++++++++++++++++++++++------
>  lib/ethdev/rte_ethdev.h | 45 +++++++++++++++++++--
>  2 files changed, 118 insertions(+), 14 deletions(-)
> 
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
> 1979dc0850..7fd5443eb8 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -1635,7 +1635,55 @@ rte_eth_dev_is_removed(uint16_t port_id)  }
> 
>  static int
> -rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
> +rte_eth_rx_queue_check_sort(const struct rte_eth_rxseg *rx_seg,
> +			     uint16_t n_seg, uint32_t *mbp_buf_size,
> +			     const struct rte_eth_dev_info *dev_info) {
> +	const struct rte_eth_rxseg_capa *seg_capa = &dev_info-
> >rx_seg_capa;
> +	uint16_t seg_idx;
> +
> +	if (!seg_capa->multi_pools || n_seg > seg_capa->max_npool) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid capabilities, multi_pools:%d differnt
> length segments %u exceed supported %u\n",
> +			       seg_capa->multi_pools, n_seg, seg_capa-
> >max_nseg);
> +		return -EINVAL;
> +	}
> +
> +	for (seg_idx = 0; seg_idx < n_seg; seg_idx++) {
> +		struct rte_mempool *mpl = rx_seg[seg_idx].sort.mp;
> +		uint32_t length = rx_seg[seg_idx].sort.length;
> +
> +		if (mpl == NULL) {
> +			RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");
> +			return -EINVAL;
> +		}
> +
> +		if (mpl->private_data_size <
> +			sizeof(struct rte_pktmbuf_pool_private)) {
> +			RTE_ETHDEV_LOG(ERR,
> +				       "%s private_data_size %u < %u\n",
> +				       mpl->name, mpl->private_data_size,
> +				       (unsigned int)sizeof
> +					(struct rte_pktmbuf_pool_private));
> +			return -ENOSPC;
> +		}
> +
> +		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
> +		length = length != 0 ? length : (*mbp_buf_size -
> RTE_PKTMBUF_HEADROOM);
> +		if (*mbp_buf_size < length + RTE_PKTMBUF_HEADROOM) {
> +			RTE_ETHDEV_LOG(ERR,
> +				       "%s mbuf_data_room_size %u < %u))\n",
> +				       mpl->name, *mbp_buf_size,
> +				       length);
> +			return -EINVAL;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +rte_eth_rx_queue_check_split(const struct rte_eth_rxseg *rx_seg,
>  			     uint16_t n_seg, uint32_t *mbp_buf_size,
>  			     const struct rte_eth_dev_info *dev_info)  { @@ -
> 1654,12 +1702,12 @@ rte_eth_rx_queue_check_split(const struct
> rte_eth_rxseg_split *rx_seg,
>  	 * Check the sizes and offsets against buffer sizes
>  	 * for each segment specified in extended configuration.
>  	 */
> -	mp_first = rx_seg[0].mp;
> +	mp_first = rx_seg[0].split.mp;
>  	offset_mask = RTE_BIT32(seg_capa->offset_align_log2) - 1;
>  	for (seg_idx = 0; seg_idx < n_seg; seg_idx++) {
> -		struct rte_mempool *mpl = rx_seg[seg_idx].mp;
> -		uint32_t length = rx_seg[seg_idx].length;
> -		uint32_t offset = rx_seg[seg_idx].offset;
> +		struct rte_mempool *mpl = rx_seg[seg_idx].split.mp;
> +		uint32_t length = rx_seg[seg_idx].split.length;
> +		uint32_t offset = rx_seg[seg_idx].split.offset;
> 
>  		if (mpl == NULL) {
>  			RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");
> @@ -1693,7 +1741,11 @@ rte_eth_rx_queue_check_split(const struct
> rte_eth_rxseg_split *rx_seg,
>  		}
>  		offset += seg_idx != 0 ? 0 : RTE_PKTMBUF_HEADROOM;
>  		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
> -		length = length != 0 ? length : *mbp_buf_size;
> +		/* On segment length == 0, update segment's length with
> +		 * the pool's length - headeroom space, to make sure enough
> +		 * space is accomidate for header.
> +		 **/
> +		length = length != 0 ? length : (*mbp_buf_size -
> +RTE_PKTMBUF_HEADROOM);
>  		if (*mbp_buf_size < length + offset) {
>  			RTE_ETHDEV_LOG(ERR,
>  				       "%s mbuf_data_room_size %u < %u
> (segment length=%u + segment offset=%u)\n", @@ -1764,7 +1816,6 @@
> rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
>  			return -EINVAL;
>  		}
>  	} else {
> -		const struct rte_eth_rxseg_split *rx_seg;
>  		uint16_t n_seg;
> 
>  		/* Extended multi-segment configuration check. */ @@ -
> 1774,13 +1825,27 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t
> rx_queue_id,
>  			return -EINVAL;
>  		}
> 
> -		rx_seg = (const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
>  		n_seg = rx_conf->rx_nseg;
> 
>  		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)
> {
> -			ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
> -							   &mbp_buf_size,
> -							   &dev_info);
> +			ret = -1; /* To make sure at least one of below
> conditions becomes
> +true */
> +
> +			/* Check both NIX and application supports buffer-
> split capability */
> +			if (dev_info.rx_seg_capa.mode_split &&
> +			    rx_conf->mode_flag ==
> RTE_ETH_RXSEG_MODE_SPLIT) {
> +				ret = rte_eth_rx_queue_check_split(rx_conf-
> >rx_seg, n_seg,
> +
> &mbp_buf_size,
> +								   &dev_info);
> +			}
> +
> +			/* Check both NIX and application supports pool-sort
> capability */
> +			if (dev_info.rx_seg_capa.mode_sort &&
> +			    rx_conf->mode_flag ==
> RTE_ETH_RXSEG_MODE_SORT) {
> +				ret = rte_eth_rx_queue_check_sort(rx_conf-
> >rx_seg, n_seg,
> +
> &mbp_buf_size,
> +								  &dev_info);
> +			}
> +
>  			if (ret != 0)
>  				return ret;
>  		} else {
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
> de9e970d4d..9f6787d7ad 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -1204,16 +1204,46 @@ struct rte_eth_rxseg_split {
>  	uint32_t reserved; /**< Reserved field. */  };
> 
> +/**
> + * The pool sort capability allows PMD to choose a memory pool based on
> +the
> + * packet's length. So, basically, PMD programs HW for receiving
> +packets from
> + * different pools, based on the packet's length.
> + *
> + * This is often useful for saving the memory where the application can
> +create
> + * a different pool to steer the specific size of the packet, thus
> +enabling
> + * effective use of memory.
> + */
> +struct rte_eth_rxseg_sort {
> +	struct rte_mempool *mp; /**< Memory pool to allocate packets
> from. */
> +	uint16_t length; /**< Packet data length. */
> +	uint32_t reserved; /**< Reserved field. */ };
> +
> +enum rte_eth_rxseg_mode {
> +	/**
> +	 * Buffer split mode: PMD split the received packets into multiple
> segments.
> +	 * @see struct rte_eth_rxseg_split
> +	 */
> +	RTE_ETH_RXSEG_MODE_SPLIT = RTE_BIT64(0),
> +	/**
> +	 * Pool sort mode: PMD to chooses a memory pool based on the
> packet's length.
> +	 * @see struct rte_eth_rxseg_sort
> +	 */
> +	RTE_ETH_RXSEG_MODE_SORT  = RTE_BIT64(1), };
> +
>  /**
>   * @warning
>   * @b EXPERIMENTAL: this structure may change without prior notice.
>   *
>   * A common structure used to describe Rx packet segment properties.
>   */
> -union rte_eth_rxseg {
> +struct rte_eth_rxseg {
>  	/* The settings for buffer split offload. */
>  	struct rte_eth_rxseg_split split;
> -	/* The other features settings should be added here. */
> +
> +	/*The settings for packet sort offload. */
> +	struct rte_eth_rxseg_sort sort;
>  };
> 
>  /**
> @@ -1239,6 +1269,11 @@ struct rte_eth_rxconf {
>  	 * fields on rte_eth_dev_info structure are allowed to be set.
>  	 */
>  	uint64_t offloads;
> +	/**
> +	 * PMD may support more than one rxseg mode. This allows
> application
> +	 * to chose which mode to enable.
> +	 */
> +	enum rte_eth_rxseg_mode mode_flag;
>  	/**
>  	 * Points to the array of segment descriptions for an entire packet.
>  	 * Array elements are properties for consecutive Rx segments.
> @@ -1246,7 +1281,7 @@ struct rte_eth_rxconf {
>  	 * The supported capabilities of receiving segmentation is reported
>  	 * in rte_eth_dev_info.rx_seg_capa field.
>  	 */
> -	union rte_eth_rxseg *rx_seg;
> +	struct rte_eth_rxseg *rx_seg;
> 
>  	uint64_t reserved_64s[2]; /**< Reserved for future fields */
>  	void *reserved_ptrs[2];   /**< Reserved for future fields */
> @@ -1827,10 +1862,14 @@ struct rte_eth_switch_info {
>   */
>  struct rte_eth_rxseg_capa {
>  	__extension__
> +	uint32_t mode_split : 1; /**< Supports buffer split capability @see
> struct rte_eth_rxseg_split */
> +	uint32_t mode_sort : 1; /**< Supports pool sort capability @see
> struct

The same doubt here. As I know, the 'rte_eth_rxseg_capa' structure is used for buffer split.

Thanks,
Xuan

> +rte_eth_rxseg_sort */
>  	uint32_t multi_pools:1; /**< Supports receiving to multiple pools.*/
>  	uint32_t offset_allowed:1; /**< Supports buffer offsets. */
>  	uint32_t offset_align_log2:4; /**< Required offset alignment. */
>  	uint16_t max_nseg; /**< Maximum amount of segments to split. */
> +	/* < Maximum amount of pools that PMD can sort based on
> packet/segment lengths */
> +	uint16_t max_npool;
>  	uint16_t reserved; /**< Reserved field. */  };
> 
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v2 1/3] ethdev: introduce pool sort capability
  2022-08-23  3:26   ` [PATCH v2 1/3] ethdev: " Ding, Xuan
@ 2022-08-24 15:33     ` Ferruh Yigit
  2022-08-30 12:08       ` [EXT] " Hanumanth Reddy Pothula
  0 siblings, 1 reply; 75+ messages in thread
From: Ferruh Yigit @ 2022-08-24 15:33 UTC (permalink / raw)
  To: Ding, Xuan, Hanumanth Pothula, Thomas Monjalon, Andrew Rybchenko
  Cc: dev, Wu, WenxuanX, Li, Xiaoyun, stephen, Wang, YuanX, mdr, Zhang,
	Yuying, Zhang, Qi Z, viacheslavo, jerinj, ndabilpuram

On 8/23/2022 4:26 AM, Ding, Xuan wrote:
> Hi Hanumanth,
> 
>> -----Original Message-----
>> From: Hanumanth Pothula <hpothula@marvell.com>
>> Sent: Saturday, August 13, 2022 1:25 AM
>> To: Thomas Monjalon <thomas@monjalon.net>; Ferruh Yigit
>> <ferruh.yigit@xilinx.com>; Andrew Rybchenko
>> <andrew.rybchenko@oktetlabs.ru>
>> Cc: dev@dpdk.org; Ding, Xuan <xuan.ding@intel.com>; Wu, WenxuanX
>> <wenxuanx.wu@intel.com>; Li, Xiaoyun <xiaoyun.li@intel.com>;
>> stephen@networkplumber.org; Wang, YuanX <yuanx.wang@intel.com>;
>> mdr@ashroe.eu; Zhang, Yuying <yuying.zhang@intel.com>; Zhang, Qi Z
>> <qi.z.zhang@intel.com>; viacheslavo@nvidia.com; jerinj@marvell.com;
>> ndabilpuram@marvell.com; Hanumanth Pothula <hpothula@marvell.com>
>> Subject: [PATCH v2 1/3] ethdev: introduce pool sort capability
>>
>> Presently, the 'Buffer Split' feature supports sending multiple segments of
>> the received packet to PMD, which programs the HW to receive the packet in
>> segments from different pools.
>>
>> This patch extends the feature to support the pool sort capability.
>> Some of the HW has support for choosing memory pools based on the
>> packet's size. The pool sort capability allows PMD to choose a memory pool
>> based on the packet's length.
>>
>> This is often useful for saving the memory where the application can create a
>> different pool to steer the specific size of the packet, thus enabling effective
>> use of memory.
>>
>> For example, let's say HW has a capability of three pools,
>>   - pool-1 size is 2K
>>   - pool-2 size is > 2K and < 4K
>>   - pool-3 size is > 4K
>> Here,
>>          pool-1 can accommodate packets with sizes < 2K
>>          pool-2 can accommodate packets with sizes > 2K and < 4K
>>          pool-3 can accommodate packets with sizes > 4K
>>
>> With pool sort capability enabled in SW, an application may create three
>> pools of different sizes and send them to PMD. Allowing PMD to program
>> HW based on packet lengths. So that packets with less than 2K are received
>> on pool-1, packets with lengths between 2K and 4K are received on pool-2
>> and finally packets greater than 4K are received on pool-3.
>>
>> The following two capabilities are added to the rte_eth_rxseg_capa structure,
>> 1. pool_sort --> tells pool sort capability is supported by HW.
>> 2. max_npool --> max number of pools supported by HW.
>>
>> Defined new structure rte_eth_rxseg_sort, to be used only when pool sort
>> capability is present. If required this may be extended further to support
>> more configurations.
>>
>> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
>>
>> v2:
>>   - Along with spec changes, uploading testpmd and driver changes.
> 
> Thanks for CCing. It's an interesting feature.
> 
> But I have one question here:
> Buffer split is for split receiving packets into multiple segments, while pool sort supports
> PMD to put the receiving packets into different pools according to packet size.
> Every packet is still intact.
> 
> So, at this level, pool sort does not belong to buffer split.
> And you already use a different function to check pool sort rather than check buffer split.
> 
> Should a new RX offload be introduced? like "RTE_ETH_RX_OFFLOAD_POOL_SORT".
> 

Hi Hanumanth,

I had the similar concern with the feature. I assume you want to benefit 
from exiting config structure that gets multiple mempool as argument, 
since this feature also needs multiple mempools, but the feature is 
different.

It looks to me wrong to check 'OFFLOAD_BUFFER_SPLIT' offload to decide 
if to receive into multiple mempool or not, which doesn't have anything 
related split. Also not sure about using the 'sort' keyword.
What do you think to introduce new fetaure, instead of extending 
existing split one?
This is optimisation, right? To enable us to use less memory for the 
packet buffer, does it qualify to a device offload?


Also, what is the relation with segmented Rx, how a PMD decide to use 
segmented Rx or bigger mempool? How can application can configure this?

Need to clarify the rules, based on your sample, if a 512 bytes packet 
received, does it have to go pool-1, or can it go to any of three pools?


And I don't see any change in the 'net/cnxk' Rx burst code, when 
multiple mempool used, while filling the mbufs shouldn't it check which 
mempool is filled. How this works without update in the Rx burst code, 
or am I missing some implementation detail?


>> ---
>>   lib/ethdev/rte_ethdev.c | 87 +++++++++++++++++++++++++++++++++++------
>>   lib/ethdev/rte_ethdev.h | 45 +++++++++++++++++++--
>>   2 files changed, 118 insertions(+), 14 deletions(-)
>>
>> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
>> 1979dc0850..7fd5443eb8 100644
>> --- a/lib/ethdev/rte_ethdev.c
>> +++ b/lib/ethdev/rte_ethdev.c
>> @@ -1635,7 +1635,55 @@ rte_eth_dev_is_removed(uint16_t port_id)  }
>>
>>   static int
>> -rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
>> +rte_eth_rx_queue_check_sort(const struct rte_eth_rxseg *rx_seg,
>> +			     uint16_t n_seg, uint32_t *mbp_buf_size,
>> +			     const struct rte_eth_dev_info *dev_info) {
>> +	const struct rte_eth_rxseg_capa *seg_capa = &dev_info-
>>> rx_seg_capa;
>> +	uint16_t seg_idx;
>> +
>> +	if (!seg_capa->multi_pools || n_seg > seg_capa->max_npool) {
>> +		RTE_ETHDEV_LOG(ERR,
>> +			       "Invalid capabilities, multi_pools:%d differnt
>> length segments %u exceed supported %u\n",
>> +			       seg_capa->multi_pools, n_seg, seg_capa-
>>> max_nseg);
>> +		return -EINVAL;
>> +	}
>> +
>> +	for (seg_idx = 0; seg_idx < n_seg; seg_idx++) {
>> +		struct rte_mempool *mpl = rx_seg[seg_idx].sort.mp;
>> +		uint32_t length = rx_seg[seg_idx].sort.length;
>> +
>> +		if (mpl == NULL) {
>> +			RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");
>> +			return -EINVAL;
>> +		}
>> +
>> +		if (mpl->private_data_size <
>> +			sizeof(struct rte_pktmbuf_pool_private)) {
>> +			RTE_ETHDEV_LOG(ERR,
>> +				       "%s private_data_size %u < %u\n",
>> +				       mpl->name, mpl->private_data_size,
>> +				       (unsigned int)sizeof
>> +					(struct rte_pktmbuf_pool_private));
>> +			return -ENOSPC;
>> +		}
>> +
>> +		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
>> +		length = length != 0 ? length : (*mbp_buf_size -
>> RTE_PKTMBUF_HEADROOM);
>> +		if (*mbp_buf_size < length + RTE_PKTMBUF_HEADROOM) {
>> +			RTE_ETHDEV_LOG(ERR,
>> +				       "%s mbuf_data_room_size %u < %u))\n",
>> +				       mpl->name, *mbp_buf_size,
>> +				       length);
>> +			return -EINVAL;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static int
>> +rte_eth_rx_queue_check_split(const struct rte_eth_rxseg *rx_seg,
>>   			     uint16_t n_seg, uint32_t *mbp_buf_size,
>>   			     const struct rte_eth_dev_info *dev_info)  { @@ -
>> 1654,12 +1702,12 @@ rte_eth_rx_queue_check_split(const struct
>> rte_eth_rxseg_split *rx_seg,
>>   	 * Check the sizes and offsets against buffer sizes
>>   	 * for each segment specified in extended configuration.
>>   	 */
>> -	mp_first = rx_seg[0].mp;
>> +	mp_first = rx_seg[0].split.mp;
>>   	offset_mask = RTE_BIT32(seg_capa->offset_align_log2) - 1;
>>   	for (seg_idx = 0; seg_idx < n_seg; seg_idx++) {
>> -		struct rte_mempool *mpl = rx_seg[seg_idx].mp;
>> -		uint32_t length = rx_seg[seg_idx].length;
>> -		uint32_t offset = rx_seg[seg_idx].offset;
>> +		struct rte_mempool *mpl = rx_seg[seg_idx].split.mp;
>> +		uint32_t length = rx_seg[seg_idx].split.length;
>> +		uint32_t offset = rx_seg[seg_idx].split.offset;
>>
>>   		if (mpl == NULL) {
>>   			RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");
>> @@ -1693,7 +1741,11 @@ rte_eth_rx_queue_check_split(const struct
>> rte_eth_rxseg_split *rx_seg,
>>   		}
>>   		offset += seg_idx != 0 ? 0 : RTE_PKTMBUF_HEADROOM;
>>   		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
>> -		length = length != 0 ? length : *mbp_buf_size;
>> +		/* On segment length == 0, update segment's length with
>> +		 * the pool's length - headeroom space, to make sure enough
>> +		 * space is accomidate for header.
>> +		 **/
>> +		length = length != 0 ? length : (*mbp_buf_size -
>> +RTE_PKTMBUF_HEADROOM);
>>   		if (*mbp_buf_size < length + offset) {
>>   			RTE_ETHDEV_LOG(ERR,
>>   				       "%s mbuf_data_room_size %u < %u
>> (segment length=%u + segment offset=%u)\n", @@ -1764,7 +1816,6 @@
>> rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
>>   			return -EINVAL;
>>   		}
>>   	} else {
>> -		const struct rte_eth_rxseg_split *rx_seg;
>>   		uint16_t n_seg;
>>
>>   		/* Extended multi-segment configuration check. */ @@ -
>> 1774,13 +1825,27 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t
>> rx_queue_id,
>>   			return -EINVAL;
>>   		}
>>
>> -		rx_seg = (const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
>>   		n_seg = rx_conf->rx_nseg;
>>
>>   		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)
>> {
>> -			ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
>> -							   &mbp_buf_size,
>> -							   &dev_info);
>> +			ret = -1; /* To make sure at least one of below
>> conditions becomes
>> +true */
>> +
>> +			/* Check both NIX and application supports buffer-
>> split capability */
>> +			if (dev_info.rx_seg_capa.mode_split &&
>> +			    rx_conf->mode_flag ==
>> RTE_ETH_RXSEG_MODE_SPLIT) {
>> +				ret = rte_eth_rx_queue_check_split(rx_conf-
>>> rx_seg, n_seg,
>> +
>> &mbp_buf_size,
>> +								   &dev_info);
>> +			}
>> +
>> +			/* Check both NIX and application supports pool-sort
>> capability */
>> +			if (dev_info.rx_seg_capa.mode_sort &&
>> +			    rx_conf->mode_flag ==
>> RTE_ETH_RXSEG_MODE_SORT) {
>> +				ret = rte_eth_rx_queue_check_sort(rx_conf-
>>> rx_seg, n_seg,
>> +
>> &mbp_buf_size,
>> +								  &dev_info);
>> +			}
>> +
>>   			if (ret != 0)
>>   				return ret;
>>   		} else {
>> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
>> de9e970d4d..9f6787d7ad 100644
>> --- a/lib/ethdev/rte_ethdev.h
>> +++ b/lib/ethdev/rte_ethdev.h
>> @@ -1204,16 +1204,46 @@ struct rte_eth_rxseg_split {
>>   	uint32_t reserved; /**< Reserved field. */  };
>>
>> +/**
>> + * The pool sort capability allows PMD to choose a memory pool based on
>> +the
>> + * packet's length. So, basically, PMD programs HW for receiving
>> +packets from
>> + * different pools, based on the packet's length.
>> + *
>> + * This is often useful for saving the memory where the application can
>> +create
>> + * a different pool to steer the specific size of the packet, thus
>> +enabling
>> + * effective use of memory.
>> + */
>> +struct rte_eth_rxseg_sort {
>> +	struct rte_mempool *mp; /**< Memory pool to allocate packets
>> from. */
>> +	uint16_t length; /**< Packet data length. */
>> +	uint32_t reserved; /**< Reserved field. */ };
>> +
>> +enum rte_eth_rxseg_mode {
>> +	/**
>> +	 * Buffer split mode: PMD split the received packets into multiple
>> segments.
>> +	 * @see struct rte_eth_rxseg_split
>> +	 */
>> +	RTE_ETH_RXSEG_MODE_SPLIT = RTE_BIT64(0),
>> +	/**
>> +	 * Pool sort mode: PMD to chooses a memory pool based on the
>> packet's length.
>> +	 * @see struct rte_eth_rxseg_sort
>> +	 */
>> +	RTE_ETH_RXSEG_MODE_SORT  = RTE_BIT64(1), };
>> +
>>   /**
>>    * @warning
>>    * @b EXPERIMENTAL: this structure may change without prior notice.
>>    *
>>    * A common structure used to describe Rx packet segment properties.
>>    */
>> -union rte_eth_rxseg {
>> +struct rte_eth_rxseg {
>>   	/* The settings for buffer split offload. */
>>   	struct rte_eth_rxseg_split split;
>> -	/* The other features settings should be added here. */
>> +
>> +	/*The settings for packet sort offload. */
>> +	struct rte_eth_rxseg_sort sort;
>>   };
>>
>>   /**
>> @@ -1239,6 +1269,11 @@ struct rte_eth_rxconf {
>>   	 * fields on rte_eth_dev_info structure are allowed to be set.
>>   	 */
>>   	uint64_t offloads;
>> +	/**
>> +	 * PMD may support more than one rxseg mode. This allows
>> application
>> +	 * to chose which mode to enable.
>> +	 */
>> +	enum rte_eth_rxseg_mode mode_flag;
>>   	/**
>>   	 * Points to the array of segment descriptions for an entire packet.
>>   	 * Array elements are properties for consecutive Rx segments.
>> @@ -1246,7 +1281,7 @@ struct rte_eth_rxconf {
>>   	 * The supported capabilities of receiving segmentation is reported
>>   	 * in rte_eth_dev_info.rx_seg_capa field.
>>   	 */
>> -	union rte_eth_rxseg *rx_seg;
>> +	struct rte_eth_rxseg *rx_seg;
>>
>>   	uint64_t reserved_64s[2]; /**< Reserved for future fields */
>>   	void *reserved_ptrs[2];   /**< Reserved for future fields */
>> @@ -1827,10 +1862,14 @@ struct rte_eth_switch_info {
>>    */
>>   struct rte_eth_rxseg_capa {
>>   	__extension__
>> +	uint32_t mode_split : 1; /**< Supports buffer split capability @see
>> struct rte_eth_rxseg_split */
>> +	uint32_t mode_sort : 1; /**< Supports pool sort capability @see
>> struct
> 
> The same doubt here. As I know, the 'rte_eth_rxseg_capa' structure is used for buffer split.
> 
> Thanks,
> Xuan
> 
>> +rte_eth_rxseg_sort */
>>   	uint32_t multi_pools:1; /**< Supports receiving to multiple pools.*/
>>   	uint32_t offset_allowed:1; /**< Supports buffer offsets. */
>>   	uint32_t offset_align_log2:4; /**< Required offset alignment. */
>>   	uint16_t max_nseg; /**< Maximum amount of segments to split. */
>> +	/* < Maximum amount of pools that PMD can sort based on
>> packet/segment lengths */
>> +	uint16_t max_npool;
>>   	uint16_t reserved; /**< Reserved field. */  };
>>
>> --
>> 2.25.1
> 


^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort capability
  2022-08-24 15:33     ` Ferruh Yigit
@ 2022-08-30 12:08       ` Hanumanth Reddy Pothula
  2022-09-06 12:18         ` Ferruh Yigit
  0 siblings, 1 reply; 75+ messages in thread
From: Hanumanth Reddy Pothula @ 2022-08-30 12:08 UTC (permalink / raw)
  To: Ferruh Yigit, Ding, Xuan, Thomas Monjalon, Andrew Rybchenko
  Cc: dev, Wu, WenxuanX, Li, Xiaoyun, stephen, Wang, YuanX, mdr, Zhang,
	Yuying, Zhang, Qi Z, viacheslavo, Jerin Jacob Kollanukkaran,
	Nithin Kumar Dabilpuram



> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> Sent: Wednesday, August 24, 2022 9:04 PM
> To: Ding, Xuan <xuan.ding@intel.com>; Hanumanth Reddy Pothula
> <hpothula@marvell.com>; Thomas Monjalon <thomas@monjalon.net>; Andrew
> Rybchenko <andrew.rybchenko@oktetlabs.ru>
> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li, Xiaoyun
> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>
> Subject: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort capability
> 
> External Email
> 
> ----------------------------------------------------------------------


Thanks Ding Xuan and Ferruh Yigit for reviewing the changes and for providing your valuable feedback.
Please find responses inline.

> On 8/23/2022 4:26 AM, Ding, Xuan wrote:
> > Hi Hanumanth,
> >
> >> -----Original Message-----
> >> From: Hanumanth Pothula <hpothula@marvell.com>
> >> Sent: Saturday, August 13, 2022 1:25 AM
> >> To: Thomas Monjalon <thomas@monjalon.net>; Ferruh Yigit
> >> <ferruh.yigit@xilinx.com>; Andrew Rybchenko
> >> <andrew.rybchenko@oktetlabs.ru>
> >> Cc: dev@dpdk.org; Ding, Xuan <xuan.ding@intel.com>; Wu, WenxuanX
> >> <wenxuanx.wu@intel.com>; Li, Xiaoyun <xiaoyun.li@intel.com>;
> >> stephen@networkplumber.org; Wang, YuanX <yuanx.wang@intel.com>;
> >> mdr@ashroe.eu; Zhang, Yuying <yuying.zhang@intel.com>; Zhang, Qi Z
> >> <qi.z.zhang@intel.com>; viacheslavo@nvidia.com; jerinj@marvell.com;
> >> ndabilpuram@marvell.com; Hanumanth Pothula <hpothula@marvell.com>
> >> Subject: [PATCH v2 1/3] ethdev: introduce pool sort capability
> >>
> >> Presently, the 'Buffer Split' feature supports sending multiple
> >> segments of the received packet to PMD, which programs the HW to
> >> receive the packet in segments from different pools.
> >>
> >> This patch extends the feature to support the pool sort capability.
> >> Some of the HW has support for choosing memory pools based on the
> >> packet's size. The pool sort capability allows PMD to choose a memory
> >> pool based on the packet's length.
> >>
> >> This is often useful for saving the memory where the application can
> >> create a different pool to steer the specific size of the packet,
> >> thus enabling effective use of memory.
> >>
> >> For example, let's say HW has a capability of three pools,
> >>   - pool-1 size is 2K
> >>   - pool-2 size is > 2K and < 4K
> >>   - pool-3 size is > 4K
> >> Here,
> >>          pool-1 can accommodate packets with sizes < 2K
> >>          pool-2 can accommodate packets with sizes > 2K and < 4K
> >>          pool-3 can accommodate packets with sizes > 4K
> >>
> >> With pool sort capability enabled in SW, an application may create
> >> three pools of different sizes and send them to PMD. Allowing PMD to
> >> program HW based on packet lengths. So that packets with less than 2K
> >> are received on pool-1, packets with lengths between 2K and 4K are
> >> received on pool-2 and finally packets greater than 4K are received on pool-
> 3.
> >>
> >> The following two capabilities are added to the rte_eth_rxseg_capa
> >> structure, 1. pool_sort --> tells pool sort capability is supported by HW.
> >> 2. max_npool --> max number of pools supported by HW.
> >>
> >> Defined new structure rte_eth_rxseg_sort, to be used only when pool
> >> sort capability is present. If required this may be extended further
> >> to support more configurations.
> >>
> >> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> >>
> >> v2:
> >>   - Along with spec changes, uploading testpmd and driver changes.
> >
> > Thanks for CCing. It's an interesting feature.
> >
> > But I have one question here:
> > Buffer split is for split receiving packets into multiple segments,
> > while pool sort supports PMD to put the receiving packets into different pools
> according to packet size.
> > Every packet is still intact.
> >
> > So, at this level, pool sort does not belong to buffer split.
> > And you already use a different function to check pool sort rather than check
> buffer split.
> >
> > Should a new RX offload be introduced? like
> "RTE_ETH_RX_OFFLOAD_POOL_SORT".
> >
Please find my response below. 
> 
> Hi Hanumanth,
> 
> I had the similar concern with the feature. I assume you want to benefit from
> exiting config structure that gets multiple mempool as argument, since this
> feature also needs multiple mempools, but the feature is different.
> 
> It looks to me wrong to check 'OFFLOAD_BUFFER_SPLIT' offload to decide if to
> receive into multiple mempool or not, which doesn't have anything related split.
> Also not sure about using the 'sort' keyword.
> What do you think to introduce new fetaure, instead of extending existing split
> one?

Actually we thought both BUFFER_SPLIT and POOL_SORT are similar features where RX
pools are configured in certain way and thought not use up one more RX offload capability, 
as the existing software architecture can be extended to support pool_sort capability.
Yes, as part of pool sort, there is no buffer split but pools are picked based on the buffer length.

Since you think it's better to use new RX offload for POOL_SORT, will go ahead and implement the same.

> This is optimisation, right? To enable us to use less memory for the packet
> buffer, does it qualify to a device offload?
> 
Yes, its qualify as a device offload and saves memory.
Marvel NIC has a capability to receive packets on  two different pools based on its length.
Below explained more on the same.
> 
> Also, what is the relation with segmented Rx, how a PMD decide to use
> segmented Rx or bigger mempool? How can application can configure this?
> 
> Need to clarify the rules, based on your sample, if a 512 bytes packet received,
> does it have to go pool-1, or can it go to any of three pools?
> 
Here, Marvell NIC supports two HW pools, SPB(small packet buffer) pool and LPB(large packet buffer) pool.
SPB pool can hold up to 4KB
LPB pool can hold anything more than 4KB
Smaller packets are received on SPB pool and larger packets on LPB pool, based on the RQ configuration.
Here, in our case HW pools holds whole packet. So if a packet is divided into segments, lower layer
HW going to receive all segments of the packet and then going to place the whole packet in SPB/LPB pool,
based on the packet length.

As pools are picked based on the packets length we used SORT term. In case you have any better term(word), please suggest. 

> 
> And I don't see any change in the 'net/cnxk' Rx burst code, when
> multiple mempool used, while filling the mbufs shouldn't it check which
> mempool is filled. How this works without update in the Rx burst code,
> or am I missing some implementation detail?
> 
Please find PMD changes in patch [v2,3/3] net/cnxk: introduce pool sort capability
Here, in control path, HW pools are programmed based on the inputs it received from the application.
Once the HW is programmed, packets are received on HW pools based the packets sizes.
> 

I will upload V3 where POOL_SORT is implemented as new RX OFFLOAD, unless If you have any other suggestion/thoughts.

> >> ---
> >>   lib/ethdev/rte_ethdev.c | 87 +++++++++++++++++++++++++++++++++++-----
> -
> >>   lib/ethdev/rte_ethdev.h | 45 +++++++++++++++++++--
> >>   2 files changed, 118 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
> >> 1979dc0850..7fd5443eb8 100644
> >> --- a/lib/ethdev/rte_ethdev.c
> >> +++ b/lib/ethdev/rte_ethdev.c
> >> @@ -1635,7 +1635,55 @@ rte_eth_dev_is_removed(uint16_t port_id)  }
> >>
> >>   static int
> >> -rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
> >> +rte_eth_rx_queue_check_sort(const struct rte_eth_rxseg *rx_seg,
> >> +			     uint16_t n_seg, uint32_t *mbp_buf_size,
> >> +			     const struct rte_eth_dev_info *dev_info) {
> >> +	const struct rte_eth_rxseg_capa *seg_capa = &dev_info-
> >>> rx_seg_capa;
> >> +	uint16_t seg_idx;
> >> +
> >> +	if (!seg_capa->multi_pools || n_seg > seg_capa->max_npool) {
> >> +		RTE_ETHDEV_LOG(ERR,
> >> +			       "Invalid capabilities, multi_pools:%d differnt
> >> length segments %u exceed supported %u\n",
> >> +			       seg_capa->multi_pools, n_seg, seg_capa-
> >>> max_nseg);
> >> +		return -EINVAL;
> >> +	}
> >> +
> >> +	for (seg_idx = 0; seg_idx < n_seg; seg_idx++) {
> >> +		struct rte_mempool *mpl = rx_seg[seg_idx].sort.mp;
> >> +		uint32_t length = rx_seg[seg_idx].sort.length;
> >> +
> >> +		if (mpl == NULL) {
> >> +			RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");
> >> +			return -EINVAL;
> >> +		}
> >> +
> >> +		if (mpl->private_data_size <
> >> +			sizeof(struct rte_pktmbuf_pool_private)) {
> >> +			RTE_ETHDEV_LOG(ERR,
> >> +				       "%s private_data_size %u < %u\n",
> >> +				       mpl->name, mpl->private_data_size,
> >> +				       (unsigned int)sizeof
> >> +					(struct rte_pktmbuf_pool_private));
> >> +			return -ENOSPC;
> >> +		}
> >> +
> >> +		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
> >> +		length = length != 0 ? length : (*mbp_buf_size -
> >> RTE_PKTMBUF_HEADROOM);
> >> +		if (*mbp_buf_size < length + RTE_PKTMBUF_HEADROOM) {
> >> +			RTE_ETHDEV_LOG(ERR,
> >> +				       "%s mbuf_data_room_size %u < %u))\n",
> >> +				       mpl->name, *mbp_buf_size,
> >> +				       length);
> >> +			return -EINVAL;
> >> +		}
> >> +	}
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +static int
> >> +rte_eth_rx_queue_check_split(const struct rte_eth_rxseg *rx_seg,
> >>   			     uint16_t n_seg, uint32_t *mbp_buf_size,
> >>   			     const struct rte_eth_dev_info *dev_info)  { @@ -
> >> 1654,12 +1702,12 @@ rte_eth_rx_queue_check_split(const struct
> >> rte_eth_rxseg_split *rx_seg,
> >>   	 * Check the sizes and offsets against buffer sizes
> >>   	 * for each segment specified in extended configuration.
> >>   	 */
> >> -	mp_first = rx_seg[0].mp;
> >> +	mp_first = rx_seg[0].split.mp;
> >>   	offset_mask = RTE_BIT32(seg_capa->offset_align_log2) - 1;
> >>   	for (seg_idx = 0; seg_idx < n_seg; seg_idx++) {
> >> -		struct rte_mempool *mpl = rx_seg[seg_idx].mp;
> >> -		uint32_t length = rx_seg[seg_idx].length;
> >> -		uint32_t offset = rx_seg[seg_idx].offset;
> >> +		struct rte_mempool *mpl = rx_seg[seg_idx].split.mp;
> >> +		uint32_t length = rx_seg[seg_idx].split.length;
> >> +		uint32_t offset = rx_seg[seg_idx].split.offset;
> >>
> >>   		if (mpl == NULL) {
> >>   			RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");
> >> @@ -1693,7 +1741,11 @@ rte_eth_rx_queue_check_split(const struct
> >> rte_eth_rxseg_split *rx_seg,
> >>   		}
> >>   		offset += seg_idx != 0 ? 0 : RTE_PKTMBUF_HEADROOM;
> >>   		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
> >> -		length = length != 0 ? length : *mbp_buf_size;
> >> +		/* On segment length == 0, update segment's length with
> >> +		 * the pool's length - headeroom space, to make sure enough
> >> +		 * space is accomidate for header.
> >> +		 **/
> >> +		length = length != 0 ? length : (*mbp_buf_size -
> >> +RTE_PKTMBUF_HEADROOM);
> >>   		if (*mbp_buf_size < length + offset) {
> >>   			RTE_ETHDEV_LOG(ERR,
> >>   				       "%s mbuf_data_room_size %u < %u
> >> (segment length=%u + segment offset=%u)\n", @@ -1764,7 +1816,6 @@
> >> rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
> >>   			return -EINVAL;
> >>   		}
> >>   	} else {
> >> -		const struct rte_eth_rxseg_split *rx_seg;
> >>   		uint16_t n_seg;
> >>
> >>   		/* Extended multi-segment configuration check. */ @@ -
> >> 1774,13 +1825,27 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t
> >> rx_queue_id,
> >>   			return -EINVAL;
> >>   		}
> >>
> >> -		rx_seg = (const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
> >>   		n_seg = rx_conf->rx_nseg;
> >>
> >>   		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)
> >> {
> >> -			ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
> >> -							   &mbp_buf_size,
> >> -							   &dev_info);
> >> +			ret = -1; /* To make sure at least one of below
> >> conditions becomes
> >> +true */
> >> +
> >> +			/* Check both NIX and application supports buffer-
> >> split capability */
> >> +			if (dev_info.rx_seg_capa.mode_split &&
> >> +			    rx_conf->mode_flag ==
> >> RTE_ETH_RXSEG_MODE_SPLIT) {
> >> +				ret = rte_eth_rx_queue_check_split(rx_conf-
> >>> rx_seg, n_seg,
> >> +
> >> &mbp_buf_size,
> >> +								   &dev_info);
> >> +			}
> >> +
> >> +			/* Check both NIX and application supports pool-sort
> >> capability */
> >> +			if (dev_info.rx_seg_capa.mode_sort &&
> >> +			    rx_conf->mode_flag ==
> >> RTE_ETH_RXSEG_MODE_SORT) {
> >> +				ret = rte_eth_rx_queue_check_sort(rx_conf-
> >>> rx_seg, n_seg,
> >> +
> >> &mbp_buf_size,
> >> +								  &dev_info);
> >> +			}
> >> +
> >>   			if (ret != 0)
> >>   				return ret;
> >>   		} else {
> >> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
> >> de9e970d4d..9f6787d7ad 100644
> >> --- a/lib/ethdev/rte_ethdev.h
> >> +++ b/lib/ethdev/rte_ethdev.h
> >> @@ -1204,16 +1204,46 @@ struct rte_eth_rxseg_split {
> >>   	uint32_t reserved; /**< Reserved field. */  };
> >>
> >> +/**
> >> + * The pool sort capability allows PMD to choose a memory pool based on
> >> +the
> >> + * packet's length. So, basically, PMD programs HW for receiving
> >> +packets from
> >> + * different pools, based on the packet's length.
> >> + *
> >> + * This is often useful for saving the memory where the application can
> >> +create
> >> + * a different pool to steer the specific size of the packet, thus
> >> +enabling
> >> + * effective use of memory.
> >> + */
> >> +struct rte_eth_rxseg_sort {
> >> +	struct rte_mempool *mp; /**< Memory pool to allocate packets
> >> from. */
> >> +	uint16_t length; /**< Packet data length. */
> >> +	uint32_t reserved; /**< Reserved field. */ };
> >> +
> >> +enum rte_eth_rxseg_mode {
> >> +	/**
> >> +	 * Buffer split mode: PMD split the received packets into multiple
> >> segments.
> >> +	 * @see struct rte_eth_rxseg_split
> >> +	 */
> >> +	RTE_ETH_RXSEG_MODE_SPLIT = RTE_BIT64(0),
> >> +	/**
> >> +	 * Pool sort mode: PMD to chooses a memory pool based on the
> >> packet's length.
> >> +	 * @see struct rte_eth_rxseg_sort
> >> +	 */
> >> +	RTE_ETH_RXSEG_MODE_SORT  = RTE_BIT64(1), };
> >> +
> >>   /**
> >>    * @warning
> >>    * @b EXPERIMENTAL: this structure may change without prior notice.
> >>    *
> >>    * A common structure used to describe Rx packet segment properties.
> >>    */
> >> -union rte_eth_rxseg {
> >> +struct rte_eth_rxseg {
> >>   	/* The settings for buffer split offload. */
> >>   	struct rte_eth_rxseg_split split;
> >> -	/* The other features settings should be added here. */
> >> +
> >> +	/*The settings for packet sort offload. */
> >> +	struct rte_eth_rxseg_sort sort;
> >>   };
> >>
> >>   /**
> >> @@ -1239,6 +1269,11 @@ struct rte_eth_rxconf {
> >>   	 * fields on rte_eth_dev_info structure are allowed to be set.
> >>   	 */
> >>   	uint64_t offloads;
> >> +	/**
> >> +	 * PMD may support more than one rxseg mode. This allows
> >> application
> >> +	 * to chose which mode to enable.
> >> +	 */
> >> +	enum rte_eth_rxseg_mode mode_flag;
> >>   	/**
> >>   	 * Points to the array of segment descriptions for an entire packet.
> >>   	 * Array elements are properties for consecutive Rx segments.
> >> @@ -1246,7 +1281,7 @@ struct rte_eth_rxconf {
> >>   	 * The supported capabilities of receiving segmentation is reported
> >>   	 * in rte_eth_dev_info.rx_seg_capa field.
> >>   	 */
> >> -	union rte_eth_rxseg *rx_seg;
> >> +	struct rte_eth_rxseg *rx_seg;
> >>
> >>   	uint64_t reserved_64s[2]; /**< Reserved for future fields */
> >>   	void *reserved_ptrs[2];   /**< Reserved for future fields */
> >> @@ -1827,10 +1862,14 @@ struct rte_eth_switch_info {
> >>    */
> >>   struct rte_eth_rxseg_capa {
> >>   	__extension__
> >> +	uint32_t mode_split : 1; /**< Supports buffer split capability @see
> >> struct rte_eth_rxseg_split */
> >> +	uint32_t mode_sort : 1; /**< Supports pool sort capability @see
> >> struct
> >
> > The same doubt here. As I know, the 'rte_eth_rxseg_capa' structure is used for
> buffer split.
> >
> > Thanks,
> > Xuan
> >
> >> +rte_eth_rxseg_sort */
> >>   	uint32_t multi_pools:1; /**< Supports receiving to multiple pools.*/
> >>   	uint32_t offset_allowed:1; /**< Supports buffer offsets. */
> >>   	uint32_t offset_align_log2:4; /**< Required offset alignment. */
> >>   	uint16_t max_nseg; /**< Maximum amount of segments to split. */
> >> +	/* < Maximum amount of pools that PMD can sort based on
> >> packet/segment lengths */
> >> +	uint16_t max_npool;
> >>   	uint16_t reserved; /**< Reserved field. */  };
> >>
> >> --
> >> 2.25.1
> >


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v3 1/3] ethdev: introduce pool sort capability
  2022-08-12 17:24 ` [PATCH v2 1/3] " Hanumanth Pothula
                     ` (2 preceding siblings ...)
  2022-08-23  3:26   ` [PATCH v2 1/3] ethdev: " Ding, Xuan
@ 2022-09-02  7:00   ` Hanumanth Pothula
  2022-09-02  7:00     ` [PATCH v3 2/3] app/testpmd: Add support for " Hanumanth Pothula
                       ` (3 more replies)
  3 siblings, 4 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-09-02  7:00 UTC (permalink / raw)
  To: Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko
  Cc: dev, xuan.ding, wenxuanx.wu, xiaoyun.li, stephen, yuanx.wang,
	mdr, yuying.zhang, qi.z.zhang, viacheslavo, jerinj, ndabilpuram,
	Hanumanth Pothula

This patch adds support for the pool sort capability.
Some of the HW has support for choosing memory pools based on the
packet's size. The pool sort capability allows PMD to choose a
memory pool based on the packet's length.

This is often useful for saving the memory where the application
can create a different pool to steer the specific size of the
packet, thus enabling effective use of memory.

For example, let's say HW has a capability of three pools,
 - pool-1 size is 2K
 - pool-2 size is > 2K and < 4K
 - pool-3 size is > 4K
Here,
        pool-1 can accommodate packets with sizes < 2K
        pool-2 can accommodate packets with sizes > 2K and < 4K
        pool-3 can accommodate packets with sizes > 4K

With pool sort capability enabled in SW, an application may create
three pools of different sizes and send them to PMD. Allowing PMD
to program HW based on packet lengths. So that packets with less
than 2K are received on pool-1, packets with lengths between 2K
and 4K are received on pool-2 and finally packets greater than 4K
are received on pool-3.

The following two capabilities are added to the rte_eth_rxseg_capa
structure,
1. pool_sort --> tells pool sort capability is supported by HW.
2. max_npool --> max number of pools supported by HW.

Defined new structure rte_eth_rxseg_sort, to be used only when pool
sort capability is present. If required this may be extended further
to support more configurations.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>

v3:
 - Implemented Pool Sort capability as new Rx offload capability,
   RTE_ETH_RX_OFFLOAD_BUFFER_SORT.
v2:
 - Along with spec changes, uploading testpmd and driver changes.
---
 lib/ethdev/rte_ethdev.c | 69 ++++++++++++++++++++++++++++++++++++++---
 lib/ethdev/rte_ethdev.h | 24 +++++++++++++-
 2 files changed, 88 insertions(+), 5 deletions(-)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 1979dc0850..5152c08f1e 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1634,6 +1634,58 @@ rte_eth_dev_is_removed(uint16_t port_id)
 	return ret;
 }
 
+static int
+rte_eth_rx_queue_check_sort(const struct rte_eth_rxseg_sort *rx_seg,
+			     uint16_t n_seg, uint32_t *mbp_buf_size,
+			     const struct rte_eth_dev_info *dev_info)
+{
+	const struct rte_eth_rxseg_capa *seg_capa = &dev_info->rx_seg_capa;
+	uint16_t seg_idx;
+
+	if (!seg_capa->multi_pools || n_seg > seg_capa->max_npool) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid capabilities, multi_pools:%d different length segments %u exceed supported %u\n",
+			       seg_capa->multi_pools, n_seg, seg_capa->max_nseg);
+		return -EINVAL;
+	}
+
+	for (seg_idx = 0; seg_idx < n_seg; seg_idx++) {
+		struct rte_mempool *mpl = rx_seg[seg_idx].mp;
+		uint32_t length = rx_seg[seg_idx].length;
+
+		if (mpl == NULL) {
+			RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");
+			return -EINVAL;
+		}
+
+		if (mpl->private_data_size <
+			sizeof(struct rte_pktmbuf_pool_private)) {
+			RTE_ETHDEV_LOG(ERR,
+				       "%s private_data_size %u < %u\n",
+				       mpl->name, mpl->private_data_size,
+				       (unsigned int)sizeof
+					(struct rte_pktmbuf_pool_private));
+			return -ENOSPC;
+		}
+
+		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
+		/* On segment length == 0, update segment's length with
+		 * the pool's length - headeroom space, to make sure enough
+		 * space is accomidate for header.
+		 **/
+		length = length != 0 ? length : (*mbp_buf_size - RTE_PKTMBUF_HEADROOM);
+		if (*mbp_buf_size < length + RTE_PKTMBUF_HEADROOM) {
+			RTE_ETHDEV_LOG(ERR,
+				       "%s mbuf_data_room_size %u < %u))\n",
+				       mpl->name, *mbp_buf_size,
+				       length);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int
 rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 			     uint16_t n_seg, uint32_t *mbp_buf_size,
@@ -1764,7 +1816,6 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 			return -EINVAL;
 		}
 	} else {
-		const struct rte_eth_rxseg_split *rx_seg;
 		uint16_t n_seg;
 
 		/* Extended multi-segment configuration check. */
@@ -1774,13 +1825,23 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 			return -EINVAL;
 		}
 
-		rx_seg = (const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
 		n_seg = rx_conf->rx_nseg;
 
 		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+			const struct rte_eth_rxseg_split *rx_seg =
+				(const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
 			ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
-							   &mbp_buf_size,
-							   &dev_info);
+								   &mbp_buf_size,
+								   &dev_info);
+			if (ret != 0)
+				return ret;
+		} else if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SORT) {
+			const struct rte_eth_rxseg_sort *rx_seg =
+				(const struct rte_eth_rxseg_sort *)rx_conf->rx_seg;
+			ret = rte_eth_rx_queue_check_sort(rx_seg, n_seg,
+								  &mbp_buf_size,
+								  &dev_info);
+
 			if (ret != 0)
 				return ret;
 		} else {
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index de9e970d4d..f7b5901a40 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1204,6 +1204,21 @@ struct rte_eth_rxseg_split {
 	uint32_t reserved; /**< Reserved field. */
 };
 
+/**
+ * The pool sort capability allows PMD to choose a memory pool based on the
+ * packet's length. So, basically, PMD programs HW for receiving packets from
+ * different pools, based on the packet's length.
+ *
+ * This is often useful for saving the memory where the application can create
+ * a different pool to steer the specific size of the packet, thus enabling
+ * effective use of memory.
+ */
+struct rte_eth_rxseg_sort {
+	struct rte_mempool *mp; /**< Memory pool to allocate packets from. */
+	uint16_t length; /**< Packet data length. */
+	uint32_t reserved; /**< Reserved field. */
+};
+
 /**
  * @warning
  * @b EXPERIMENTAL: this structure may change without prior notice.
@@ -1213,7 +1228,9 @@ struct rte_eth_rxseg_split {
 union rte_eth_rxseg {
 	/* The settings for buffer split offload. */
 	struct rte_eth_rxseg_split split;
-	/* The other features settings should be added here. */
+
+	/*The settings for packet sort offload. */
+	struct rte_eth_rxseg_sort sort;
 };
 
 /**
@@ -1633,6 +1650,7 @@ struct rte_eth_conf {
 #define RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM  RTE_BIT64(18)
 #define RTE_ETH_RX_OFFLOAD_RSS_HASH         RTE_BIT64(19)
 #define RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT     RTE_BIT64(20)
+#define RTE_ETH_RX_OFFLOAD_BUFFER_SORT      RTE_BIT64(21)
 
 #define DEV_RX_OFFLOAD_VLAN_STRIP       RTE_DEPRECATED(DEV_RX_OFFLOAD_VLAN_STRIP)       RTE_ETH_RX_OFFLOAD_VLAN_STRIP
 #define DEV_RX_OFFLOAD_IPV4_CKSUM       RTE_DEPRECATED(DEV_RX_OFFLOAD_IPV4_CKSUM)       RTE_ETH_RX_OFFLOAD_IPV4_CKSUM
@@ -1827,10 +1845,14 @@ struct rte_eth_switch_info {
  */
 struct rte_eth_rxseg_capa {
 	__extension__
+	uint32_t mode_split : 1; /**< Supports buffer split capability @see struct rte_eth_rxseg_split */
+	uint32_t mode_sort : 1; /**< Supports pool sort capability @see struct rte_eth_rxseg_sort */
 	uint32_t multi_pools:1; /**< Supports receiving to multiple pools.*/
 	uint32_t offset_allowed:1; /**< Supports buffer offsets. */
 	uint32_t offset_align_log2:4; /**< Required offset alignment. */
 	uint16_t max_nseg; /**< Maximum amount of segments to split. */
+	/* < Maximum amount of pools that PMD can sort based on packet/segment lengths */
+	uint16_t max_npool;
 	uint16_t reserved; /**< Reserved field. */
 };
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v3 2/3] app/testpmd: Add support for pool sort capability
  2022-09-02  7:00   ` [PATCH v3 " Hanumanth Pothula
@ 2022-09-02  7:00     ` Hanumanth Pothula
  2022-09-02  7:00     ` [PATCH v3 3/3] net/cnxk: introduce " Hanumanth Pothula
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-09-02  7:00 UTC (permalink / raw)
  To: Aman Singh, Yuying Zhang
  Cc: dev, andrew.rybchenko, xuan.ding, wenxuanx.wu, thomas,
	xiaoyun.li, stephen, yuanx.wang, mdr, ferruh.yigit, qi.z.zhang,
	viacheslavo, jerinj, ndabilpuram, Hanumanth Pothula

This patch adds support for the pool sort capability.
Some of the HW has support for choosing memory pools based on the
packet's size. The pool sort capability allows PMD to choose a
memory pool based on the packet's length.

Populate Rx Sort/Split attributes based on the Rx offload value.
Also, print pool name on which packet is received.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
---
 app/test-pmd/testpmd.c | 31 ++++++++++++++++++++++---------
 app/test-pmd/util.c    |  4 ++--
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index addcbcac85..57f1d806b1 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2661,7 +2661,8 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	int ret;
 
 	if (rx_pkt_nb_segs <= 1 ||
-	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
+	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT ||
+	     rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SORT) == 0) {
 		rx_conf->rx_seg = NULL;
 		rx_conf->rx_nseg = 0;
 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
@@ -2670,7 +2671,8 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		goto exit;
 	}
 	for (i = 0; i < rx_pkt_nb_segs; i++) {
-		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
+		struct rte_eth_rxseg_split *rx_split = &rx_useg[i].split;
+		struct rte_eth_rxseg_sort  *rx_sort = &rx_useg[i].sort;
 		struct rte_mempool *mpx;
 		/*
 		 * Use last valid pool for the segments with number
@@ -2678,13 +2680,24 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		 */
 		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
 		mpx = mbuf_pool_find(socket_id, mp_n);
-		/* Handle zero as mbuf data buffer size. */
-		rx_seg->length = rx_pkt_seg_lengths[i] ?
-				   rx_pkt_seg_lengths[i] :
-				   mbuf_data_size[mp_n];
-		rx_seg->offset = i < rx_pkt_nb_offs ?
-				   rx_pkt_seg_offsets[i] : 0;
-		rx_seg->mp = mpx ? mpx : mp;
+		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+			/**
+			 * On Segment length zero, update length as,
+			 *      buffer size - headroom size
+			 * to make sure enough space is accomidate for header.
+			 */
+			rx_split->length = rx_pkt_seg_lengths[i] ?
+					   rx_pkt_seg_lengths[i] :
+					   mbuf_data_size[mp_n] - RTE_PKTMBUF_HEADROOM;
+			rx_split->offset = i < rx_pkt_nb_offs ?
+					   rx_pkt_seg_offsets[i] : 0;
+			rx_split->mp = mpx ? mpx : mp;
+		} else if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SORT) {
+			rx_sort->length = rx_pkt_seg_lengths[i] ?
+					  rx_pkt_seg_lengths[i] :
+					  mbuf_data_size[mp_n] - RTE_PKTMBUF_HEADROOM;
+			rx_sort->mp = mpx ? mpx : mp;
+		}
 	}
 	rx_conf->rx_nseg = rx_pkt_nb_segs;
 	rx_conf->rx_seg = rx_useg;
diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index fd98e8b51d..f9df5f69ef 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
 				 print_buf, buf_size, &cur_len);
 		MKDUMPSTR(print_buf, buf_size, cur_len,
-			  " - type=0x%04x - length=%u - nb_segs=%d",
-			  eth_type, (unsigned int) mb->pkt_len,
+			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
+			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
 		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v3 3/3] net/cnxk: introduce pool sort capability
  2022-09-02  7:00   ` [PATCH v3 " Hanumanth Pothula
  2022-09-02  7:00     ` [PATCH v3 2/3] app/testpmd: Add support for " Hanumanth Pothula
@ 2022-09-02  7:00     ` Hanumanth Pothula
  2022-09-13  8:06     ` [PATCH v3 1/3] ethdev: " Andrew Rybchenko
  2022-09-15  7:07     ` [PATCH v4 1/3] ethdev: Add support for mulitiple mbuf pools per Rx queue Hanumanth Pothula
  3 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-09-02  7:00 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
  Cc: dev, andrew.rybchenko, xuan.ding, wenxuanx.wu, thomas,
	xiaoyun.li, stephen, yuanx.wang, mdr, ferruh.yigit, yuying.zhang,
	qi.z.zhang, viacheslavo, jerinj, Hanumanth Pothula

Presently, HW is programmed only to receive packets from LPB pool.
Making all packets received from LPB pool.

But, CNXK HW supports two pools,
 - SPB -> packets with smaller size (less than 4K)
 - LPB -> packets with bigger size (greater than 4K)

Patch enables pool sorting capability, pool is selected based on
packet's length. So, basically, PMD programs HW for receiving
packets from both SPB and LPB pools based on the packet's length.

This is achieved by enabling rx buffer split offload,
RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT. This allows the application to send
more than one pool(in our case two) to the driver, with different
segment(packet) lengths, which helps the driver to configure both
pools based on segment lengths.

This is often useful for saving the memory where the application
can create a different pool to steer the specific size of the
packet, thus enabling effective use of memory.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
---
 doc/guides/nics/features/cnxk.ini     |  1 +
 doc/guides/nics/features/cnxk_vec.ini |  1 +
 drivers/net/cnxk/cnxk_ethdev.c        | 93 ++++++++++++++++++++++++---
 drivers/net/cnxk/cnxk_ethdev.h        |  4 +-
 drivers/net/cnxk/cnxk_ethdev_ops.c    |  7 ++
 5 files changed, 96 insertions(+), 10 deletions(-)

diff --git a/doc/guides/nics/features/cnxk.ini b/doc/guides/nics/features/cnxk.ini
index 1876fe86c7..e1584ed740 100644
--- a/doc/guides/nics/features/cnxk.ini
+++ b/doc/guides/nics/features/cnxk.ini
@@ -4,6 +4,7 @@
 ; Refer to default.ini for the full list of available PMD features.
 ;
 [Features]
+pool sort	     = Y
 Speed capabilities   = Y
 Rx interrupt         = Y
 Lock-free Tx queue   = Y
diff --git a/doc/guides/nics/features/cnxk_vec.ini b/doc/guides/nics/features/cnxk_vec.ini
index 5d0976e6ce..a63d35aae7 100644
--- a/doc/guides/nics/features/cnxk_vec.ini
+++ b/doc/guides/nics/features/cnxk_vec.ini
@@ -4,6 +4,7 @@
 ; Refer to default.ini for the full list of available PMD features.
 ;
 [Features]
+pool sort	     = Y
 Speed capabilities   = Y
 Rx interrupt         = Y
 Lock-free Tx queue   = Y
diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c
index cfcc4df916..376c5274d3 100644
--- a/drivers/net/cnxk/cnxk_ethdev.c
+++ b/drivers/net/cnxk/cnxk_ethdev.c
@@ -537,6 +537,64 @@ cnxk_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid)
 	plt_free(txq_sp);
 }
 
+static int
+cnxk_nix_process_rx_conf(const struct rte_eth_rxconf *rx_conf,
+			 struct rte_mempool **lpb_pool, struct rte_mempool **spb_pool,
+			 uint16_t *lpb_len, uint16_t *spb_len)
+{
+	struct rte_eth_rxseg_sort rx_seg0;
+	struct rte_eth_rxseg_sort rx_seg1;
+	const char *platform_ops;
+	struct rte_mempool_ops *ops;
+
+	if (*lpb_pool || !rx_conf->rx_seg || rx_conf->rx_nseg != CNXK_NIX_NUM_POOLS_MAX ||
+	    !rx_conf->rx_seg[0].sort.mp || !rx_conf->rx_seg[1].sort.mp) {
+		plt_err("invalid arguments");
+		return -EINVAL;
+	}
+
+	rx_seg0 = rx_conf->rx_seg[0].sort;
+	rx_seg1 = rx_conf->rx_seg[1].sort;
+
+	if (rx_seg0.length >= rx_seg0.mp->elt_size || rx_seg1.length >= rx_seg1.mp->elt_size) {
+		plt_err("mismatch in packet length & pool length seg0_len:%u pool0_len:%u"\
+			"seg1_len:%u pool1_len:%u", rx_seg0.length, rx_seg0.mp->elt_size,
+			rx_seg1.length, rx_seg1.mp->elt_size);
+		return -EINVAL;
+	}
+
+	if (rx_seg0.length > rx_seg1.length) {
+		*lpb_pool = rx_seg0.mp;
+		*spb_pool = rx_seg1.mp;
+
+		*lpb_len = rx_seg0.length;
+		*spb_len = rx_seg1.length;
+	} else {
+		*lpb_pool = rx_seg1.mp;
+		*spb_pool = rx_seg0.mp;
+
+		*lpb_len = rx_seg1.length;
+		*spb_len = rx_seg0.length;
+	}
+
+	if ((*spb_pool)->pool_id == 0) {
+		plt_err("Invalid pool_id");
+		return -EINVAL;
+	}
+
+	platform_ops = rte_mbuf_platform_mempool_ops();
+	ops = rte_mempool_get_ops((*spb_pool)->ops_index);
+	if (strncmp(ops->name, platform_ops, RTE_MEMPOOL_OPS_NAMESIZE)) {
+		plt_err("mempool ops should be of cnxk_npa type");
+		return -EINVAL;
+	}
+
+	plt_info("spb_pool:%s lpb_pool:%s lpb_len:%u spb_len:%u\n", (*spb_pool)->name,
+		 (*lpb_pool)->name, *lpb_len, *spb_len);
+
+	return 0;
+}
+
 int
 cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			uint32_t nb_desc, uint16_t fp_rx_q_sz,
@@ -553,6 +611,10 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	uint16_t first_skip;
 	int rc = -EINVAL;
 	size_t rxq_sz;
+	uint16_t lpb_len = 0;
+	uint16_t spb_len = 0;
+	struct rte_mempool *lpb_pool = mp;
+	struct rte_mempool *spb_pool = NULL;
 
 	/* Sanity checks */
 	if (rx_conf->rx_deferred_start == 1) {
@@ -560,15 +622,22 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 		goto fail;
 	}
 
+	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SORT) {
+		rc = cnxk_nix_process_rx_conf(rx_conf, &lpb_pool, &spb_pool,
+					      &lpb_len, &spb_len);
+		if (rc)
+			goto fail;
+	}
+
 	platform_ops = rte_mbuf_platform_mempool_ops();
 	/* This driver needs cnxk_npa mempool ops to work */
-	ops = rte_mempool_get_ops(mp->ops_index);
+	ops = rte_mempool_get_ops(lpb_pool->ops_index);
 	if (strncmp(ops->name, platform_ops, RTE_MEMPOOL_OPS_NAMESIZE)) {
 		plt_err("mempool ops should be of cnxk_npa type");
 		goto fail;
 	}
 
-	if (mp->pool_id == 0) {
+	if (lpb_pool->pool_id == 0) {
 		plt_err("Invalid pool_id");
 		goto fail;
 	}
@@ -585,13 +654,13 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Its a no-op when inline device is not used */
 	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY ||
 	    dev->tx_offloads & RTE_ETH_TX_OFFLOAD_SECURITY)
-		roc_nix_inl_dev_xaq_realloc(mp->pool_id);
+		roc_nix_inl_dev_xaq_realloc(lpb_pool->pool_id);
 
 	/* Increase CQ size to Aura size to avoid CQ overflow and
 	 * then CPT buffer leak.
 	 */
 	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY)
-		nb_desc = nix_inl_cq_sz_clamp_up(nix, mp, nb_desc);
+		nb_desc = nix_inl_cq_sz_clamp_up(nix, lpb_pool, nb_desc);
 
 	/* Setup ROC CQ */
 	cq = &dev->cqs[qid];
@@ -606,23 +675,29 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Setup ROC RQ */
 	rq = &dev->rqs[qid];
 	rq->qid = qid;
-	rq->aura_handle = mp->pool_id;
+	rq->aura_handle = lpb_pool->pool_id;
 	rq->flow_tag_width = 32;
 	rq->sso_ena = false;
 
 	/* Calculate first mbuf skip */
 	first_skip = (sizeof(struct rte_mbuf));
 	first_skip += RTE_PKTMBUF_HEADROOM;
-	first_skip += rte_pktmbuf_priv_size(mp);
+	first_skip += rte_pktmbuf_priv_size(lpb_pool);
 	rq->first_skip = first_skip;
 	rq->later_skip = sizeof(struct rte_mbuf);
-	rq->lpb_size = mp->elt_size;
+	rq->lpb_size = lpb_len ? lpb_len : lpb_pool->elt_size;
 	rq->lpb_drop_ena = !(dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY);
 
 	/* Enable Inline IPSec on RQ, will not be used for Poll mode */
 	if (roc_nix_inl_inb_is_enabled(nix))
 		rq->ipsech_ena = true;
 
+	if (spb_pool) {
+		rq->spb_ena = 1;
+		rq->spb_aura_handle = spb_pool->pool_id;
+		rq->spb_size = spb_len;
+	}
+
 	rc = roc_nix_rq_init(&dev->nix, rq, !!eth_dev->data->dev_started);
 	if (rc) {
 		plt_err("Failed to init roc rq for rq=%d, rc=%d", qid, rc);
@@ -645,7 +720,7 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Queue config should reflect global offloads */
 	rxq_sp->qconf.conf.rx.offloads = dev->rx_offloads;
 	rxq_sp->qconf.nb_desc = nb_desc;
-	rxq_sp->qconf.mp = mp;
+	rxq_sp->qconf.mp = lpb_pool;
 	rxq_sp->tc = 0;
 	rxq_sp->tx_pause = (dev->fc_cfg.mode == RTE_ETH_FC_FULL ||
 			    dev->fc_cfg.mode == RTE_ETH_FC_TX_PAUSE);
@@ -664,7 +739,7 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			goto free_mem;
 	}
 
-	plt_nix_dbg("rq=%d pool=%s nb_desc=%d->%d", qid, mp->name, nb_desc,
+	plt_nix_dbg("rq=%d pool=%s nb_desc=%d->%d", qid, lpb_pool->name, nb_desc,
 		    cq->nb_desc);
 
 	/* Store start of fast path area */
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index f11a9a0b63..4b0c11b7d2 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -44,6 +44,8 @@
 #define CNXK_NIX_RX_DEFAULT_RING_SZ 4096
 /* Max supported SQB count */
 #define CNXK_NIX_TX_MAX_SQB 512
+/* LPB & SPB */
+#define CNXK_NIX_NUM_POOLS_MAX 2
 
 /* If PTP is enabled additional SEND MEM DESC is required which
  * takes 2 words, hence max 7 iova address are possible
@@ -83,7 +85,7 @@
 	 RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM | RTE_ETH_RX_OFFLOAD_SCATTER |    \
 	 RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_RSS_HASH |    \
 	 RTE_ETH_RX_OFFLOAD_TIMESTAMP | RTE_ETH_RX_OFFLOAD_VLAN_STRIP |        \
-	 RTE_ETH_RX_OFFLOAD_SECURITY)
+	 RTE_ETH_RX_OFFLOAD_BUFFER_SORT | RTE_ETH_RX_OFFLOAD_SECURITY)
 
 #define RSS_IPV4_ENABLE                                                        \
 	(RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |                            \
diff --git a/drivers/net/cnxk/cnxk_ethdev_ops.c b/drivers/net/cnxk/cnxk_ethdev_ops.c
index 1592971073..6174a586be 100644
--- a/drivers/net/cnxk/cnxk_ethdev_ops.c
+++ b/drivers/net/cnxk/cnxk_ethdev_ops.c
@@ -69,6 +69,13 @@ cnxk_nix_info_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *devinfo)
 	devinfo->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
 			    RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP |
 			    RTE_ETH_DEV_CAPA_FLOW_RULE_KEEP;
+
+	devinfo->rx_seg_capa = (struct rte_eth_rxseg_capa){
+		.mode_sort = 1,
+		.multi_pools = 1,
+		.max_npool = CNXK_NIX_NUM_POOLS_MAX,
+	};
+
 	return 0;
 }
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort capability
  2022-08-30 12:08       ` [EXT] " Hanumanth Reddy Pothula
@ 2022-09-06 12:18         ` Ferruh Yigit
  2022-09-07  7:02           ` Hanumanth Reddy Pothula
  0 siblings, 1 reply; 75+ messages in thread
From: Ferruh Yigit @ 2022-09-06 12:18 UTC (permalink / raw)
  To: Hanumanth Reddy Pothula, Ding, Xuan, Thomas Monjalon, Andrew Rybchenko
  Cc: dev, Wu, WenxuanX, Li, Xiaoyun, stephen, Wang, YuanX, mdr, Zhang,
	Yuying, Zhang, Qi Z, viacheslavo, Jerin Jacob Kollanukkaran,
	Nithin Kumar Dabilpuram

On 8/30/2022 1:08 PM, Hanumanth Reddy Pothula wrote:
> 
> 
>> -----Original Message-----
>> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
>> Sent: Wednesday, August 24, 2022 9:04 PM
>> To: Ding, Xuan <xuan.ding@intel.com>; Hanumanth Reddy Pothula
>> <hpothula@marvell.com>; Thomas Monjalon <thomas@monjalon.net>; Andrew
>> Rybchenko <andrew.rybchenko@oktetlabs.ru>
>> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li, Xiaoyun
>> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
>> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
>> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
>> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
>> Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>
>> Subject: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort capability
>>
>> External Email
>>
>> ----------------------------------------------------------------------
> 
> 
> Thanks Ding Xuan and Ferruh Yigit for reviewing the changes and for providing your valuable feedback.
> Please find responses inline.
> 
>> On 8/23/2022 4:26 AM, Ding, Xuan wrote:
>>> Hi Hanumanth,
>>>
>>>> -----Original Message-----
>>>> From: Hanumanth Pothula <hpothula@marvell.com>
>>>> Sent: Saturday, August 13, 2022 1:25 AM
>>>> To: Thomas Monjalon <thomas@monjalon.net>; Ferruh Yigit
>>>> <ferruh.yigit@xilinx.com>; Andrew Rybchenko
>>>> <andrew.rybchenko@oktetlabs.ru>
>>>> Cc: dev@dpdk.org; Ding, Xuan <xuan.ding@intel.com>; Wu, WenxuanX
>>>> <wenxuanx.wu@intel.com>; Li, Xiaoyun <xiaoyun.li@intel.com>;
>>>> stephen@networkplumber.org; Wang, YuanX <yuanx.wang@intel.com>;
>>>> mdr@ashroe.eu; Zhang, Yuying <yuying.zhang@intel.com>; Zhang, Qi Z
>>>> <qi.z.zhang@intel.com>; viacheslavo@nvidia.com; jerinj@marvell.com;
>>>> ndabilpuram@marvell.com; Hanumanth Pothula <hpothula@marvell.com>
>>>> Subject: [PATCH v2 1/3] ethdev: introduce pool sort capability
>>>>
>>>> Presently, the 'Buffer Split' feature supports sending multiple
>>>> segments of the received packet to PMD, which programs the HW to
>>>> receive the packet in segments from different pools.
>>>>
>>>> This patch extends the feature to support the pool sort capability.
>>>> Some of the HW has support for choosing memory pools based on the
>>>> packet's size. The pool sort capability allows PMD to choose a memory
>>>> pool based on the packet's length.
>>>>
>>>> This is often useful for saving the memory where the application can
>>>> create a different pool to steer the specific size of the packet,
>>>> thus enabling effective use of memory.
>>>>
>>>> For example, let's say HW has a capability of three pools,
>>>>    - pool-1 size is 2K
>>>>    - pool-2 size is > 2K and < 4K
>>>>    - pool-3 size is > 4K
>>>> Here,
>>>>           pool-1 can accommodate packets with sizes < 2K
>>>>           pool-2 can accommodate packets with sizes > 2K and < 4K
>>>>           pool-3 can accommodate packets with sizes > 4K
>>>>
>>>> With pool sort capability enabled in SW, an application may create
>>>> three pools of different sizes and send them to PMD. Allowing PMD to
>>>> program HW based on packet lengths. So that packets with less than 2K
>>>> are received on pool-1, packets with lengths between 2K and 4K are
>>>> received on pool-2 and finally packets greater than 4K are received on pool-
>> 3.
>>>>
>>>> The following two capabilities are added to the rte_eth_rxseg_capa
>>>> structure, 1. pool_sort --> tells pool sort capability is supported by HW.
>>>> 2. max_npool --> max number of pools supported by HW.
>>>>
>>>> Defined new structure rte_eth_rxseg_sort, to be used only when pool
>>>> sort capability is present. If required this may be extended further
>>>> to support more configurations.
>>>>
>>>> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
>>>>
>>>> v2:
>>>>    - Along with spec changes, uploading testpmd and driver changes.
>>>
>>> Thanks for CCing. It's an interesting feature.
>>>
>>> But I have one question here:
>>> Buffer split is for split receiving packets into multiple segments,
>>> while pool sort supports PMD to put the receiving packets into different pools
>> according to packet size.
>>> Every packet is still intact.
>>>
>>> So, at this level, pool sort does not belong to buffer split.
>>> And you already use a different function to check pool sort rather than check
>> buffer split.
>>>
>>> Should a new RX offload be introduced? like
>> "RTE_ETH_RX_OFFLOAD_POOL_SORT".
>>>
> Please find my response below.
>>
>> Hi Hanumanth,
>>
>> I had the similar concern with the feature. I assume you want to benefit from
>> exiting config structure that gets multiple mempool as argument, since this
>> feature also needs multiple mempools, but the feature is different.
>>
>> It looks to me wrong to check 'OFFLOAD_BUFFER_SPLIT' offload to decide if to
>> receive into multiple mempool or not, which doesn't have anything related split.
>> Also not sure about using the 'sort' keyword.
>> What do you think to introduce new fetaure, instead of extending existing split
>> one?
> 
> Actually we thought both BUFFER_SPLIT and POOL_SORT are similar features where RX
> pools are configured in certain way and thought not use up one more RX offload capability,
> as the existing software architecture can be extended to support pool_sort capability.
> Yes, as part of pool sort, there is no buffer split but pools are picked based on the buffer length.
> 
> Since you think it's better to use new RX offload for POOL_SORT, will go ahead and implement the same.
> 
>> This is optimisation, right? To enable us to use less memory for the packet
>> buffer, does it qualify to a device offload?
>>
> Yes, its qualify as a device offload and saves memory.
> Marvel NIC has a capability to receive packets on  two different pools based on its length.
> Below explained more on the same.
>>
>> Also, what is the relation with segmented Rx, how a PMD decide to use
>> segmented Rx or bigger mempool? How can application can configure this?
>>
>> Need to clarify the rules, based on your sample, if a 512 bytes packet received,
>> does it have to go pool-1, or can it go to any of three pools?
>>
> Here, Marvell NIC supports two HW pools, SPB(small packet buffer) pool and LPB(large packet buffer) pool.
> SPB pool can hold up to 4KB
> LPB pool can hold anything more than 4KB
> Smaller packets are received on SPB pool and larger packets on LPB pool, based on the RQ configuration.
> Here, in our case HW pools holds whole packet. So if a packet is divided into segments, lower layer
> HW going to receive all segments of the packet and then going to place the whole packet in SPB/LPB pool,
> based on the packet length.
> 

If the packet is bigger than 4KB, you have two options,
1- Use multiple chained buffers in SPB
2- Use single LPB buffer

As I understand (2) is used in this case, but I think we should clarify 
how this feature works with 'RTE_ETH_RX_OFFLOAD_SCATTER' offload, if it 
is requested by user.

Or lets say HW has two pools with 1K and 2K sizes, what is expected with 
4K packet, with or without scattered Rx offload?

> As pools are picked based on the packets length we used SORT term. In case you have any better term(word), please suggest.
> 

what about multiple pool, like RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL, I think 
it is more clear but I would like to get more comments from others, 
naming is hard ;)

>>
>> And I don't see any change in the 'net/cnxk' Rx burst code, when
>> multiple mempool used, while filling the mbufs shouldn't it check which
>> mempool is filled. How this works without update in the Rx burst code,
>> or am I missing some implementation detail?
>>
> Please find PMD changes in patch [v2,3/3] net/cnxk: introduce pool sort capability
> Here, in control path, HW pools are programmed based on the inputs it received from the application.
> Once the HW is programmed, packets are received on HW pools based the packets sizes.

I was expecting to changes in datapath too, something like in Rx burst 
function check if spb or lpb is used and update mbuf pointers accordingly.
But it seems HW doesn't work this way, can you please explain how this 
feature works transparent to datapath code?

>>
> 
> I will upload V3 where POOL_SORT is implemented as new RX OFFLOAD, unless If you have any other suggestion/thoughts.
> 

<...>


^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort capability
  2022-09-06 12:18         ` Ferruh Yigit
@ 2022-09-07  7:02           ` Hanumanth Reddy Pothula
  2022-09-07 11:24             ` Ferruh Yigit
  0 siblings, 1 reply; 75+ messages in thread
From: Hanumanth Reddy Pothula @ 2022-09-07  7:02 UTC (permalink / raw)
  To: Ferruh Yigit, Ding, Xuan, Thomas Monjalon, Andrew Rybchenko
  Cc: dev, Wu, WenxuanX, Li, Xiaoyun, stephen, Wang, YuanX, mdr, Zhang,
	Yuying, Zhang, Qi Z, viacheslavo, Jerin Jacob Kollanukkaran,
	Nithin Kumar Dabilpuram



> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> Sent: Tuesday, September 6, 2022 5:48 PM
> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Ding, Xuan
> <xuan.ding@intel.com>; Thomas Monjalon <thomas@monjalon.net>; Andrew
> Rybchenko <andrew.rybchenko@oktetlabs.ru>
> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li, Xiaoyun
> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>
> Subject: Re: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort capability
> 
> On 8/30/2022 1:08 PM, Hanumanth Reddy Pothula wrote:
> >
> >
> >> -----Original Message-----
> >> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> >> Sent: Wednesday, August 24, 2022 9:04 PM
> >> To: Ding, Xuan <xuan.ding@intel.com>; Hanumanth Reddy Pothula
> >> <hpothula@marvell.com>; Thomas Monjalon <thomas@monjalon.net>;
> Andrew
> >> Rybchenko <andrew.rybchenko@oktetlabs.ru>
> >> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li, Xiaoyun
> >> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
> >> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
> >> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
> >> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran
> >> <jerinj@marvell.com>; Nithin Kumar Dabilpuram
> >> <ndabilpuram@marvell.com>
> >> Subject: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort
> >> capability
> >>
> >> External Email
> >>
> >> ---------------------------------------------------------------------
> >> -
> >
> >
> > Thanks Ding Xuan and Ferruh Yigit for reviewing the changes and for providing
> your valuable feedback.
> > Please find responses inline.
> >
> >> On 8/23/2022 4:26 AM, Ding, Xuan wrote:
> >>> Hi Hanumanth,
> >>>
> >>>> -----Original Message-----
> >>>> From: Hanumanth Pothula <hpothula@marvell.com>
> >>>> Sent: Saturday, August 13, 2022 1:25 AM
> >>>> To: Thomas Monjalon <thomas@monjalon.net>; Ferruh Yigit
> >>>> <ferruh.yigit@xilinx.com>; Andrew Rybchenko
> >>>> <andrew.rybchenko@oktetlabs.ru>
> >>>> Cc: dev@dpdk.org; Ding, Xuan <xuan.ding@intel.com>; Wu, WenxuanX
> >>>> <wenxuanx.wu@intel.com>; Li, Xiaoyun <xiaoyun.li@intel.com>;
> >>>> stephen@networkplumber.org; Wang, YuanX <yuanx.wang@intel.com>;
> >>>> mdr@ashroe.eu; Zhang, Yuying <yuying.zhang@intel.com>; Zhang, Qi Z
> >>>> <qi.z.zhang@intel.com>; viacheslavo@nvidia.com; jerinj@marvell.com;
> >>>> ndabilpuram@marvell.com; Hanumanth Pothula <hpothula@marvell.com>
> >>>> Subject: [PATCH v2 1/3] ethdev: introduce pool sort capability
> >>>>
> >>>> Presently, the 'Buffer Split' feature supports sending multiple
> >>>> segments of the received packet to PMD, which programs the HW to
> >>>> receive the packet in segments from different pools.
> >>>>
> >>>> This patch extends the feature to support the pool sort capability.
> >>>> Some of the HW has support for choosing memory pools based on the
> >>>> packet's size. The pool sort capability allows PMD to choose a
> >>>> memory pool based on the packet's length.
> >>>>
> >>>> This is often useful for saving the memory where the application
> >>>> can create a different pool to steer the specific size of the
> >>>> packet, thus enabling effective use of memory.
> >>>>
> >>>> For example, let's say HW has a capability of three pools,
> >>>>    - pool-1 size is 2K
> >>>>    - pool-2 size is > 2K and < 4K
> >>>>    - pool-3 size is > 4K
> >>>> Here,
> >>>>           pool-1 can accommodate packets with sizes < 2K
> >>>>           pool-2 can accommodate packets with sizes > 2K and < 4K
> >>>>           pool-3 can accommodate packets with sizes > 4K
> >>>>
> >>>> With pool sort capability enabled in SW, an application may create
> >>>> three pools of different sizes and send them to PMD. Allowing PMD
> >>>> to program HW based on packet lengths. So that packets with less
> >>>> than 2K are received on pool-1, packets with lengths between 2K and
> >>>> 4K are received on pool-2 and finally packets greater than 4K are
> >>>> received on pool-
> >> 3.
> >>>>
> >>>> The following two capabilities are added to the rte_eth_rxseg_capa
> >>>> structure, 1. pool_sort --> tells pool sort capability is supported by HW.
> >>>> 2. max_npool --> max number of pools supported by HW.
> >>>>
> >>>> Defined new structure rte_eth_rxseg_sort, to be used only when pool
> >>>> sort capability is present. If required this may be extended
> >>>> further to support more configurations.
> >>>>
> >>>> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> >>>>
> >>>> v2:
> >>>>    - Along with spec changes, uploading testpmd and driver changes.
> >>>
> >>> Thanks for CCing. It's an interesting feature.
> >>>
> >>> But I have one question here:
> >>> Buffer split is for split receiving packets into multiple segments,
> >>> while pool sort supports PMD to put the receiving packets into
> >>> different pools
> >> according to packet size.
> >>> Every packet is still intact.
> >>>
> >>> So, at this level, pool sort does not belong to buffer split.
> >>> And you already use a different function to check pool sort rather
> >>> than check
> >> buffer split.
> >>>
> >>> Should a new RX offload be introduced? like
> >> "RTE_ETH_RX_OFFLOAD_POOL_SORT".
> >>>
> > Please find my response below.
> >>
> >> Hi Hanumanth,
> >>
> >> I had the similar concern with the feature. I assume you want to
> >> benefit from exiting config structure that gets multiple mempool as
> >> argument, since this feature also needs multiple mempools, but the feature is
> different.
> >>
> >> It looks to me wrong to check 'OFFLOAD_BUFFER_SPLIT' offload to
> >> decide if to receive into multiple mempool or not, which doesn't have
> anything related split.
> >> Also not sure about using the 'sort' keyword.
> >> What do you think to introduce new fetaure, instead of extending
> >> existing split one?
> >
> > Actually we thought both BUFFER_SPLIT and POOL_SORT are similar
> > features where RX pools are configured in certain way and thought not
> > use up one more RX offload capability, as the existing software architecture
> can be extended to support pool_sort capability.
> > Yes, as part of pool sort, there is no buffer split but pools are picked based on
> the buffer length.
> >
> > Since you think it's better to use new RX offload for POOL_SORT, will go ahead
> and implement the same.
> >
> >> This is optimisation, right? To enable us to use less memory for the
> >> packet buffer, does it qualify to a device offload?
> >>
> > Yes, its qualify as a device offload and saves memory.
> > Marvel NIC has a capability to receive packets on  two different pools based
> on its length.
> > Below explained more on the same.
> >>
> >> Also, what is the relation with segmented Rx, how a PMD decide to use
> >> segmented Rx or bigger mempool? How can application can configure this?
> >>
> >> Need to clarify the rules, based on your sample, if a 512 bytes
> >> packet received, does it have to go pool-1, or can it go to any of three pools?
> >>
> > Here, Marvell NIC supports two HW pools, SPB(small packet buffer) pool and
> LPB(large packet buffer) pool.
> > SPB pool can hold up to 4KB
> > LPB pool can hold anything more than 4KB Smaller packets are received
> > on SPB pool and larger packets on LPB pool, based on the RQ configuration.
> > Here, in our case HW pools holds whole packet. So if a packet is
> > divided into segments, lower layer HW going to receive all segments of
> > the packet and then going to place the whole packet in SPB/LPB pool, based
> on the packet length.
> >
> 
> If the packet is bigger than 4KB, you have two options,
> 1- Use multiple chained buffers in SPB
> 2- Use single LPB buffer
> 
> As I understand (2) is used in this case, but I think we should clarify how this
> feature works with 'RTE_ETH_RX_OFFLOAD_SCATTER' offload, if it is requested
> by user.
> 
> Or lets say HW has two pools with 1K and 2K sizes, what is expected with 4K
> packet, with or without scattered Rx offload?
>

As mentioned, Marvell supports two pools, pool-1(SPB) and pool-2(LPB)
If the packet length is within pool-1 length and has only one segment then the packet is allocated from pool-1.
If the packet length is greater than pool-1 or has more than one segment then the packet is allocated from pool-2. 

So, here packets with a single segment and length less than 1K are allocated from pool-1 and
packets with multiple segments or packets with length greater than 1K are allocated from pool-2.

> > As pools are picked based on the packets length we used SORT term. In case
> you have any better term(word), please suggest.
> >
> 
> what about multiple pool, like RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL, I think
> it is more clear but I would like to get more comments from others, naming is
> hard ;)
> 
Yes, RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL is clearer than RTE_ETH_RX_OFFLOAD_SORT_POOL. 
Thanks for the suggestion. 
Will upload V4 with RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL.

> >>
> >> And I don't see any change in the 'net/cnxk' Rx burst code, when
> >> multiple mempool used, while filling the mbufs shouldn't it check
> >> which mempool is filled. How this works without update in the Rx
> >> burst code, or am I missing some implementation detail?
> >>
> > Please find PMD changes in patch [v2,3/3] net/cnxk: introduce pool
> > sort capability Here, in control path, HW pools are programmed based on the
> inputs it received from the application.
> > Once the HW is programmed, packets are received on HW pools based the
> packets sizes.
> 
> I was expecting to changes in datapath too, something like in Rx burst function
> check if spb or lpb is used and update mbuf pointers accordingly.
> But it seems HW doesn't work this way, can you please explain how this feature
> works transparent to datapath code?
> 
> >>
> >
> > I will upload V3 where POOL_SORT is implemented as new RX OFFLOAD, unless
> If you have any other suggestion/thoughts.
> >
> 
> <...>


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort capability
  2022-09-07  7:02           ` Hanumanth Reddy Pothula
@ 2022-09-07 11:24             ` Ferruh Yigit
  2022-09-07 21:31               ` Hanumanth Reddy Pothula
  0 siblings, 1 reply; 75+ messages in thread
From: Ferruh Yigit @ 2022-09-07 11:24 UTC (permalink / raw)
  To: Hanumanth Reddy Pothula, Ding, Xuan, Thomas Monjalon, Andrew Rybchenko
  Cc: dev, Wu, WenxuanX, Li, Xiaoyun, stephen, Wang, YuanX, mdr, Zhang,
	Yuying, Zhang, Qi Z, viacheslavo, Jerin Jacob Kollanukkaran,
	Nithin Kumar Dabilpuram

On 9/7/2022 8:02 AM, Hanumanth Reddy Pothula wrote:
> 
> 
>> -----Original Message-----
>> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
>> Sent: Tuesday, September 6, 2022 5:48 PM
>> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Ding, Xuan
>> <xuan.ding@intel.com>; Thomas Monjalon <thomas@monjalon.net>; Andrew
>> Rybchenko <andrew.rybchenko@oktetlabs.ru>
>> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li, Xiaoyun
>> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
>> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
>> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
>> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
>> Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>
>> Subject: Re: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort capability
>>
>> On 8/30/2022 1:08 PM, Hanumanth Reddy Pothula wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
>>>> Sent: Wednesday, August 24, 2022 9:04 PM
>>>> To: Ding, Xuan <xuan.ding@intel.com>; Hanumanth Reddy Pothula
>>>> <hpothula@marvell.com>; Thomas Monjalon <thomas@monjalon.net>;
>> Andrew
>>>> Rybchenko <andrew.rybchenko@oktetlabs.ru>
>>>> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li, Xiaoyun
>>>> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
>>>> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
>>>> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
>>>> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran
>>>> <jerinj@marvell.com>; Nithin Kumar Dabilpuram
>>>> <ndabilpuram@marvell.com>
>>>> Subject: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort
>>>> capability
>>>>
>>>> External Email
>>>>
>>>> ---------------------------------------------------------------------
>>>> -
>>>
>>>
>>> Thanks Ding Xuan and Ferruh Yigit for reviewing the changes and for providing
>> your valuable feedback.
>>> Please find responses inline.
>>>
>>>> On 8/23/2022 4:26 AM, Ding, Xuan wrote:
>>>>> Hi Hanumanth,
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Hanumanth Pothula <hpothula@marvell.com>
>>>>>> Sent: Saturday, August 13, 2022 1:25 AM
>>>>>> To: Thomas Monjalon <thomas@monjalon.net>; Ferruh Yigit
>>>>>> <ferruh.yigit@xilinx.com>; Andrew Rybchenko
>>>>>> <andrew.rybchenko@oktetlabs.ru>
>>>>>> Cc: dev@dpdk.org; Ding, Xuan <xuan.ding@intel.com>; Wu, WenxuanX
>>>>>> <wenxuanx.wu@intel.com>; Li, Xiaoyun <xiaoyun.li@intel.com>;
>>>>>> stephen@networkplumber.org; Wang, YuanX <yuanx.wang@intel.com>;
>>>>>> mdr@ashroe.eu; Zhang, Yuying <yuying.zhang@intel.com>; Zhang, Qi Z
>>>>>> <qi.z.zhang@intel.com>; viacheslavo@nvidia.com; jerinj@marvell.com;
>>>>>> ndabilpuram@marvell.com; Hanumanth Pothula <hpothula@marvell.com>
>>>>>> Subject: [PATCH v2 1/3] ethdev: introduce pool sort capability
>>>>>>
>>>>>> Presently, the 'Buffer Split' feature supports sending multiple
>>>>>> segments of the received packet to PMD, which programs the HW to
>>>>>> receive the packet in segments from different pools.
>>>>>>
>>>>>> This patch extends the feature to support the pool sort capability.
>>>>>> Some of the HW has support for choosing memory pools based on the
>>>>>> packet's size. The pool sort capability allows PMD to choose a
>>>>>> memory pool based on the packet's length.
>>>>>>
>>>>>> This is often useful for saving the memory where the application
>>>>>> can create a different pool to steer the specific size of the
>>>>>> packet, thus enabling effective use of memory.
>>>>>>
>>>>>> For example, let's say HW has a capability of three pools,
>>>>>>     - pool-1 size is 2K
>>>>>>     - pool-2 size is > 2K and < 4K
>>>>>>     - pool-3 size is > 4K
>>>>>> Here,
>>>>>>            pool-1 can accommodate packets with sizes < 2K
>>>>>>            pool-2 can accommodate packets with sizes > 2K and < 4K
>>>>>>            pool-3 can accommodate packets with sizes > 4K
>>>>>>
>>>>>> With pool sort capability enabled in SW, an application may create
>>>>>> three pools of different sizes and send them to PMD. Allowing PMD
>>>>>> to program HW based on packet lengths. So that packets with less
>>>>>> than 2K are received on pool-1, packets with lengths between 2K and
>>>>>> 4K are received on pool-2 and finally packets greater than 4K are
>>>>>> received on pool-
>>>> 3.
>>>>>>
>>>>>> The following two capabilities are added to the rte_eth_rxseg_capa
>>>>>> structure, 1. pool_sort --> tells pool sort capability is supported by HW.
>>>>>> 2. max_npool --> max number of pools supported by HW.
>>>>>>
>>>>>> Defined new structure rte_eth_rxseg_sort, to be used only when pool
>>>>>> sort capability is present. If required this may be extended
>>>>>> further to support more configurations.
>>>>>>
>>>>>> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
>>>>>>
>>>>>> v2:
>>>>>>     - Along with spec changes, uploading testpmd and driver changes.
>>>>>
>>>>> Thanks for CCing. It's an interesting feature.
>>>>>
>>>>> But I have one question here:
>>>>> Buffer split is for split receiving packets into multiple segments,
>>>>> while pool sort supports PMD to put the receiving packets into
>>>>> different pools
>>>> according to packet size.
>>>>> Every packet is still intact.
>>>>>
>>>>> So, at this level, pool sort does not belong to buffer split.
>>>>> And you already use a different function to check pool sort rather
>>>>> than check
>>>> buffer split.
>>>>>
>>>>> Should a new RX offload be introduced? like
>>>> "RTE_ETH_RX_OFFLOAD_POOL_SORT".
>>>>>
>>> Please find my response below.
>>>>
>>>> Hi Hanumanth,
>>>>
>>>> I had the similar concern with the feature. I assume you want to
>>>> benefit from exiting config structure that gets multiple mempool as
>>>> argument, since this feature also needs multiple mempools, but the feature is
>> different.
>>>>
>>>> It looks to me wrong to check 'OFFLOAD_BUFFER_SPLIT' offload to
>>>> decide if to receive into multiple mempool or not, which doesn't have
>> anything related split.
>>>> Also not sure about using the 'sort' keyword.
>>>> What do you think to introduce new fetaure, instead of extending
>>>> existing split one?
>>>
>>> Actually we thought both BUFFER_SPLIT and POOL_SORT are similar
>>> features where RX pools are configured in certain way and thought not
>>> use up one more RX offload capability, as the existing software architecture
>> can be extended to support pool_sort capability.
>>> Yes, as part of pool sort, there is no buffer split but pools are picked based on
>> the buffer length.
>>>
>>> Since you think it's better to use new RX offload for POOL_SORT, will go ahead
>> and implement the same.
>>>
>>>> This is optimisation, right? To enable us to use less memory for the
>>>> packet buffer, does it qualify to a device offload?
>>>>
>>> Yes, its qualify as a device offload and saves memory.
>>> Marvel NIC has a capability to receive packets on  two different pools based
>> on its length.
>>> Below explained more on the same.
>>>>
>>>> Also, what is the relation with segmented Rx, how a PMD decide to use
>>>> segmented Rx or bigger mempool? How can application can configure this?
>>>>
>>>> Need to clarify the rules, based on your sample, if a 512 bytes
>>>> packet received, does it have to go pool-1, or can it go to any of three pools?
>>>>
>>> Here, Marvell NIC supports two HW pools, SPB(small packet buffer) pool and
>> LPB(large packet buffer) pool.
>>> SPB pool can hold up to 4KB
>>> LPB pool can hold anything more than 4KB Smaller packets are received
>>> on SPB pool and larger packets on LPB pool, based on the RQ configuration.
>>> Here, in our case HW pools holds whole packet. So if a packet is
>>> divided into segments, lower layer HW going to receive all segments of
>>> the packet and then going to place the whole packet in SPB/LPB pool, based
>> on the packet length.
>>>
>>
>> If the packet is bigger than 4KB, you have two options,
>> 1- Use multiple chained buffers in SPB
>> 2- Use single LPB buffer
>>
>> As I understand (2) is used in this case, but I think we should clarify how this
>> feature works with 'RTE_ETH_RX_OFFLOAD_SCATTER' offload, if it is requested
>> by user.
>>
>> Or lets say HW has two pools with 1K and 2K sizes, what is expected with 4K
>> packet, with or without scattered Rx offload?
>>
> 
> As mentioned, Marvell supports two pools, pool-1(SPB) and pool-2(LPB)
> If the packet length is within pool-1 length and has only one segment then the packet is allocated from pool-1.
> If the packet length is greater than pool-1 or has more than one segment then the packet is allocated from pool-2.
> 
> So, here packets with a single segment and length less than 1K are allocated from pool-1 and
> packets with multiple segments or packets with length greater than 1K are allocated from pool-2.
> 

To have multiple segment or not is HW configuration, it is not external 
variable. Drivers mostly decide to configure HW to receive multiple 
segment or not based on buffer size and max packet size device support.
In this case since buffer size is not fixed, there are multiple buffer 
sizes, how driver will configure HW?

This is not specific to Marvell HW, for the case multiple mempool 
supported, it is better to clarify in this patch how it is works with 
'RTE_ETH_RX_OFFLOAD_SCATTER' offload.

>>> As pools are picked based on the packets length we used SORT term. In case
>> you have any better term(word), please suggest.
>>>
>>
>> what about multiple pool, like RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL, I think
>> it is more clear but I would like to get more comments from others, naming is
>> hard ;)
>>
> Yes, RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL is clearer than RTE_ETH_RX_OFFLOAD_SORT_POOL.
> Thanks for the suggestion.
> Will upload V4 with RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL.
> 
>>>>
>>>> And I don't see any change in the 'net/cnxk' Rx burst code, when
>>>> multiple mempool used, while filling the mbufs shouldn't it check
>>>> which mempool is filled. How this works without update in the Rx
>>>> burst code, or am I missing some implementation detail?
>>>>
>>> Please find PMD changes in patch [v2,3/3] net/cnxk: introduce pool
>>> sort capability Here, in control path, HW pools are programmed based on the
>> inputs it received from the application.
>>> Once the HW is programmed, packets are received on HW pools based the
>> packets sizes.
>>
>> I was expecting to changes in datapath too, something like in Rx burst function
>> check if spb or lpb is used and update mbuf pointers accordingly.
>> But it seems HW doesn't work this way, can you please explain how this feature
>> works transparent to datapath code?
>>
>>>>
>>>
>>> I will upload V3 where POOL_SORT is implemented as new RX OFFLOAD, unless
>> If you have any other suggestion/thoughts.
>>>
>>
>> <...>
> 


^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort capability
  2022-09-07 11:24             ` Ferruh Yigit
@ 2022-09-07 21:31               ` Hanumanth Reddy Pothula
  2022-09-13  9:28                 ` Ferruh Yigit
  0 siblings, 1 reply; 75+ messages in thread
From: Hanumanth Reddy Pothula @ 2022-09-07 21:31 UTC (permalink / raw)
  To: Ferruh Yigit, Ding, Xuan, Thomas Monjalon, Andrew Rybchenko
  Cc: dev, Wu, WenxuanX, Li, Xiaoyun, stephen, Wang, YuanX, mdr, Zhang,
	Yuying, Zhang, Qi Z, viacheslavo, Jerin Jacob Kollanukkaran,
	Nithin Kumar Dabilpuram



> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> Sent: Wednesday, September 7, 2022 4:54 PM
> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Ding, Xuan
> <xuan.ding@intel.com>; Thomas Monjalon <thomas@monjalon.net>; Andrew
> Rybchenko <andrew.rybchenko@oktetlabs.ru>
> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li, Xiaoyun
> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>
> Subject: Re: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort capability
> 
> On 9/7/2022 8:02 AM, Hanumanth Reddy Pothula wrote:
> >
> >
> >> -----Original Message-----
> >> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> >> Sent: Tuesday, September 6, 2022 5:48 PM
> >> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Ding, Xuan
> >> <xuan.ding@intel.com>; Thomas Monjalon <thomas@monjalon.net>;
> Andrew
> >> Rybchenko <andrew.rybchenko@oktetlabs.ru>
> >> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li, Xiaoyun
> >> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
> >> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
> >> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
> >> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran
> >> <jerinj@marvell.com>; Nithin Kumar Dabilpuram
> >> <ndabilpuram@marvell.com>
> >> Subject: Re: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort
> >> capability
> >>
> >> On 8/30/2022 1:08 PM, Hanumanth Reddy Pothula wrote:
> >>>
> >>>
> >>>> -----Original Message-----
> >>>> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> >>>> Sent: Wednesday, August 24, 2022 9:04 PM
> >>>> To: Ding, Xuan <xuan.ding@intel.com>; Hanumanth Reddy Pothula
> >>>> <hpothula@marvell.com>; Thomas Monjalon <thomas@monjalon.net>;
> >> Andrew
> >>>> Rybchenko <andrew.rybchenko@oktetlabs.ru>
> >>>> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li,
> Xiaoyun
> >>>> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
> >>>> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
> >>>> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
> >>>> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran
> >>>> <jerinj@marvell.com>; Nithin Kumar Dabilpuram
> >>>> <ndabilpuram@marvell.com>
> >>>> Subject: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort
> >>>> capability
> >>>>
> >>>> External Email
> >>>>
> >>>> -------------------------------------------------------------------
> >>>> --
> >>>> -
> >>>
> >>>
> >>> Thanks Ding Xuan and Ferruh Yigit for reviewing the changes and for
> >>> providing
> >> your valuable feedback.
> >>> Please find responses inline.
> >>>
> >>>> On 8/23/2022 4:26 AM, Ding, Xuan wrote:
> >>>>> Hi Hanumanth,
> >>>>>
> >>>>>> -----Original Message-----
> >>>>>> From: Hanumanth Pothula <hpothula@marvell.com>
> >>>>>> Sent: Saturday, August 13, 2022 1:25 AM
> >>>>>> To: Thomas Monjalon <thomas@monjalon.net>; Ferruh Yigit
> >>>>>> <ferruh.yigit@xilinx.com>; Andrew Rybchenko
> >>>>>> <andrew.rybchenko@oktetlabs.ru>
> >>>>>> Cc: dev@dpdk.org; Ding, Xuan <xuan.ding@intel.com>; Wu, WenxuanX
> >>>>>> <wenxuanx.wu@intel.com>; Li, Xiaoyun <xiaoyun.li@intel.com>;
> >>>>>> stephen@networkplumber.org; Wang, YuanX
> <yuanx.wang@intel.com>;
> >>>>>> mdr@ashroe.eu; Zhang, Yuying <yuying.zhang@intel.com>; Zhang, Qi
> >>>>>> Z <qi.z.zhang@intel.com>; viacheslavo@nvidia.com;
> >>>>>> jerinj@marvell.com; ndabilpuram@marvell.com; Hanumanth Pothula
> >>>>>> <hpothula@marvell.com>
> >>>>>> Subject: [PATCH v2 1/3] ethdev: introduce pool sort capability
> >>>>>>
> >>>>>> Presently, the 'Buffer Split' feature supports sending multiple
> >>>>>> segments of the received packet to PMD, which programs the HW to
> >>>>>> receive the packet in segments from different pools.
> >>>>>>
> >>>>>> This patch extends the feature to support the pool sort capability.
> >>>>>> Some of the HW has support for choosing memory pools based on the
> >>>>>> packet's size. The pool sort capability allows PMD to choose a
> >>>>>> memory pool based on the packet's length.
> >>>>>>
> >>>>>> This is often useful for saving the memory where the application
> >>>>>> can create a different pool to steer the specific size of the
> >>>>>> packet, thus enabling effective use of memory.
> >>>>>>
> >>>>>> For example, let's say HW has a capability of three pools,
> >>>>>>     - pool-1 size is 2K
> >>>>>>     - pool-2 size is > 2K and < 4K
> >>>>>>     - pool-3 size is > 4K
> >>>>>> Here,
> >>>>>>            pool-1 can accommodate packets with sizes < 2K
> >>>>>>            pool-2 can accommodate packets with sizes > 2K and < 4K
> >>>>>>            pool-3 can accommodate packets with sizes > 4K
> >>>>>>
> >>>>>> With pool sort capability enabled in SW, an application may
> >>>>>> create three pools of different sizes and send them to PMD.
> >>>>>> Allowing PMD to program HW based on packet lengths. So that
> >>>>>> packets with less than 2K are received on pool-1, packets with
> >>>>>> lengths between 2K and 4K are received on pool-2 and finally
> >>>>>> packets greater than 4K are received on pool-
> >>>> 3.
> >>>>>>
> >>>>>> The following two capabilities are added to the
> >>>>>> rte_eth_rxseg_capa structure, 1. pool_sort --> tells pool sort capability
> is supported by HW.
> >>>>>> 2. max_npool --> max number of pools supported by HW.
> >>>>>>
> >>>>>> Defined new structure rte_eth_rxseg_sort, to be used only when
> >>>>>> pool sort capability is present. If required this may be extended
> >>>>>> further to support more configurations.
> >>>>>>
> >>>>>> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> >>>>>>
> >>>>>> v2:
> >>>>>>     - Along with spec changes, uploading testpmd and driver changes.
> >>>>>
> >>>>> Thanks for CCing. It's an interesting feature.
> >>>>>
> >>>>> But I have one question here:
> >>>>> Buffer split is for split receiving packets into multiple
> >>>>> segments, while pool sort supports PMD to put the receiving
> >>>>> packets into different pools
> >>>> according to packet size.
> >>>>> Every packet is still intact.
> >>>>>
> >>>>> So, at this level, pool sort does not belong to buffer split.
> >>>>> And you already use a different function to check pool sort rather
> >>>>> than check
> >>>> buffer split.
> >>>>>
> >>>>> Should a new RX offload be introduced? like
> >>>> "RTE_ETH_RX_OFFLOAD_POOL_SORT".
> >>>>>
> >>> Please find my response below.
> >>>>
> >>>> Hi Hanumanth,
> >>>>
> >>>> I had the similar concern with the feature. I assume you want to
> >>>> benefit from exiting config structure that gets multiple mempool as
> >>>> argument, since this feature also needs multiple mempools, but the
> >>>> feature is
> >> different.
> >>>>
> >>>> It looks to me wrong to check 'OFFLOAD_BUFFER_SPLIT' offload to
> >>>> decide if to receive into multiple mempool or not, which doesn't
> >>>> have
> >> anything related split.
> >>>> Also not sure about using the 'sort' keyword.
> >>>> What do you think to introduce new fetaure, instead of extending
> >>>> existing split one?
> >>>
> >>> Actually we thought both BUFFER_SPLIT and POOL_SORT are similar
> >>> features where RX pools are configured in certain way and thought
> >>> not use up one more RX offload capability, as the existing software
> >>> architecture
> >> can be extended to support pool_sort capability.
> >>> Yes, as part of pool sort, there is no buffer split but pools are
> >>> picked based on
> >> the buffer length.
> >>>
> >>> Since you think it's better to use new RX offload for POOL_SORT,
> >>> will go ahead
> >> and implement the same.
> >>>
> >>>> This is optimisation, right? To enable us to use less memory for
> >>>> the packet buffer, does it qualify to a device offload?
> >>>>
> >>> Yes, its qualify as a device offload and saves memory.
> >>> Marvel NIC has a capability to receive packets on  two different
> >>> pools based
> >> on its length.
> >>> Below explained more on the same.
> >>>>
> >>>> Also, what is the relation with segmented Rx, how a PMD decide to
> >>>> use segmented Rx or bigger mempool? How can application can configure
> this?
> >>>>
> >>>> Need to clarify the rules, based on your sample, if a 512 bytes
> >>>> packet received, does it have to go pool-1, or can it go to any of three
> pools?
> >>>>
> >>> Here, Marvell NIC supports two HW pools, SPB(small packet buffer)
> >>> pool and
> >> LPB(large packet buffer) pool.
> >>> SPB pool can hold up to 4KB
> >>> LPB pool can hold anything more than 4KB Smaller packets are
> >>> received on SPB pool and larger packets on LPB pool, based on the RQ
> configuration.
> >>> Here, in our case HW pools holds whole packet. So if a packet is
> >>> divided into segments, lower layer HW going to receive all segments
> >>> of the packet and then going to place the whole packet in SPB/LPB
> >>> pool, based
> >> on the packet length.
> >>>
> >>
> >> If the packet is bigger than 4KB, you have two options,
> >> 1- Use multiple chained buffers in SPB
> >> 2- Use single LPB buffer
> >>
> >> As I understand (2) is used in this case, but I think we should
> >> clarify how this feature works with 'RTE_ETH_RX_OFFLOAD_SCATTER'
> >> offload, if it is requested by user.
> >>
> >> Or lets say HW has two pools with 1K and 2K sizes, what is expected
> >> with 4K packet, with or without scattered Rx offload?
> >>
> >
> > As mentioned, Marvell supports two pools, pool-1(SPB) and pool-2(LPB)
> > If the packet length is within pool-1 length and has only one segment then the
> packet is allocated from pool-1.
> > If the packet length is greater than pool-1 or has more than one segment then
> the packet is allocated from pool-2.
> >
> > So, here packets with a single segment and length less than 1K are
> > allocated from pool-1 and packets with multiple segments or packets with
> length greater than 1K are allocated from pool-2.
> >
> 
> To have multiple segment or not is HW configuration, it is not external variable.
> Drivers mostly decide to configure HW to receive multiple segment or not based
> on buffer size and max packet size device support.
> In this case since buffer size is not fixed, there are multiple buffer sizes, how
> driver will configure HW?
> 
> This is not specific to Marvell HW, for the case multiple mempool supported, it is
> better to clarify in this patch how it is works with
> 'RTE_ETH_RX_OFFLOAD_SCATTER' offload.
> 

Here, application sends multiple pools with different buffer lengths to PMD and PMD further programs HW depending on its architecture.
Similarly, in this case, where multiple HW pools are present,  with 'RTE_ETH_RX_OFFLOAD_SCATTER' enabled, PMD receives packets/segments based on the HW architecture.
Depending the architecture, if any extra programming is required(either to implement some logic or HW programming) in RX path, its to be implemented in that NIC PMD.
As far as multiple pool support is considered, in Marvell case, once HW pools are programmed in control path, there is nothing to be done on fast path.

So, I think, It depends on HW architecture of how multiple pool and multiple segment receive is implemented.

Please suggest, if I am missing any generic scenario(use case) here.

> >>> As pools are picked based on the packets length we used SORT term.
> >>> In case
> >> you have any better term(word), please suggest.
> >>>
> >>
> >> what about multiple pool, like RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL, I
> >> think it is more clear but I would like to get more comments from
> >> others, naming is hard ;)
> >>
> > Yes, RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL is clearer than
> RTE_ETH_RX_OFFLOAD_SORT_POOL.
> > Thanks for the suggestion.
> > Will upload V4 with RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL.
> >
> >>>>
> >>>> And I don't see any change in the 'net/cnxk' Rx burst code, when
> >>>> multiple mempool used, while filling the mbufs shouldn't it check
> >>>> which mempool is filled. How this works without update in the Rx
> >>>> burst code, or am I missing some implementation detail?
> >>>>
> >>> Please find PMD changes in patch [v2,3/3] net/cnxk: introduce pool
> >>> sort capability Here, in control path, HW pools are programmed based
> >>> on the
> >> inputs it received from the application.
> >>> Once the HW is programmed, packets are received on HW pools based
> >>> the
> >> packets sizes.
> >>
> >> I was expecting to changes in datapath too, something like in Rx
> >> burst function check if spb or lpb is used and update mbuf pointers
> accordingly.
> >> But it seems HW doesn't work this way, can you please explain how
> >> this feature works transparent to datapath code?
> >>
> >>>>
> >>>
> >>> I will upload V3 where POOL_SORT is implemented as new RX OFFLOAD,
> >>> unless
> >> If you have any other suggestion/thoughts.
> >>>
> >>
> >> <...>
> >


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v3 1/3] ethdev: introduce pool sort capability
  2022-09-02  7:00   ` [PATCH v3 " Hanumanth Pothula
  2022-09-02  7:00     ` [PATCH v3 2/3] app/testpmd: Add support for " Hanumanth Pothula
  2022-09-02  7:00     ` [PATCH v3 3/3] net/cnxk: introduce " Hanumanth Pothula
@ 2022-09-13  8:06     ` Andrew Rybchenko
  2022-09-13  9:31       ` Ferruh Yigit
  2022-09-15  7:07     ` [PATCH v4 1/3] ethdev: Add support for mulitiple mbuf pools per Rx queue Hanumanth Pothula
  3 siblings, 1 reply; 75+ messages in thread
From: Andrew Rybchenko @ 2022-09-13  8:06 UTC (permalink / raw)
  To: Hanumanth Pothula, Thomas Monjalon, Ferruh Yigit
  Cc: dev, xuan.ding, wenxuanx.wu, xiaoyun.li, stephen, yuanx.wang,
	mdr, yuying.zhang, qi.z.zhang, viacheslavo, jerinj, ndabilpuram

On 9/2/22 10:00, Hanumanth Pothula wrote:
> This patch adds support for the pool sort capability.

"Add support for serveral (?) mbuf pools per Rx queue."

I dislike the word "sort" in summary and the feature
description. IMHO it is too restrictive for intended behaviour.

The key feature here is just support for more than one mbuf
pool per Rx queue. That's it. Everything else should be out
of scope of the definiteion.

If buffers from many pools are provided, the hardware may do
whatever it wants with it. Use smaller buffers for small
packets and bigger for big. Use bigger buffers for small
packets if there is no small buffers available. Use big plus
small buffer if Rx scatter is enabled and a packet fits in
such combination. And so so on.

I.e. the feature should be orthogoal to Rx scatter.
Rx scatter just says if driver/application allows to chain
mbufs to receive a packet. If Rx scatter is disabled,
a packet must be delivered in a single mbuf (either big or
small). If Rx scatter is enable, a packet may be delivered
using a chain of mbufs obtained from provided pools (either
just one or many if several pools are supported).

Ideally the feature should be orthogonal to buffer split as
well. I.e. provide many pools for different segments.
May be it is an overkill to provide pools A and B for the first
segment and C and D for the second. It could be limitted to the
last segment only. If so, we need separate strcture (not
rte_eth_rxseg) to pass many pools. IMHO, an array of mempools
is sufficient - similar to Rx queue configuration.
I.e. no extra length since data length may be derived from
mempool element size.

> Some of the HW has support for choosing memory pools based on the
> packet's size. The pool sort capability allows PMD to choose a
> memory pool based on the packet's length.
> 
> This is often useful for saving the memory where the application
> can create a different pool to steer the specific size of the
> packet, thus enabling effective use of memory.
> 
> For example, let's say HW has a capability of three pools,
>   - pool-1 size is 2K
>   - pool-2 size is > 2K and < 4K
>   - pool-3 size is > 4K
> Here,
>          pool-1 can accommodate packets with sizes < 2K
>          pool-2 can accommodate packets with sizes > 2K and < 4K
>          pool-3 can accommodate packets with sizes > 4K
> 
> With pool sort capability enabled in SW, an application may create
> three pools of different sizes and send them to PMD. Allowing PMD
> to program HW based on packet lengths. So that packets with less
> than 2K are received on pool-1, packets with lengths between 2K
> and 4K are received on pool-2 and finally packets greater than 4K
> are received on pool-3.
> 
> The following two capabilities are added to the rte_eth_rxseg_capa
> structure,
> 1. pool_sort --> tells pool sort capability is supported by HW.
> 2. max_npool --> max number of pools supported by HW.
> 
> Defined new structure rte_eth_rxseg_sort, to be used only when pool
> sort capability is present. If required this may be extended further
> to support more configurations.
> 
> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> 
> v3:
>   - Implemented Pool Sort capability as new Rx offload capability,
>     RTE_ETH_RX_OFFLOAD_BUFFER_SORT.
> v2:
>   - Along with spec changes, uploading testpmd and driver changes.
> ---

[snip]


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort capability
  2022-09-07 21:31               ` Hanumanth Reddy Pothula
@ 2022-09-13  9:28                 ` Ferruh Yigit
  2022-09-13 10:00                   ` Hanumanth Reddy Pothula
  0 siblings, 1 reply; 75+ messages in thread
From: Ferruh Yigit @ 2022-09-13  9:28 UTC (permalink / raw)
  To: Hanumanth Reddy Pothula, Ding, Xuan, Thomas Monjalon, Andrew Rybchenko
  Cc: dev, Wu, WenxuanX, Li, Xiaoyun, stephen, Wang, YuanX, mdr, Zhang,
	Yuying, Zhang, Qi Z, viacheslavo, Jerin Jacob Kollanukkaran,
	Nithin Kumar Dabilpuram

On 9/7/2022 10:31 PM, Hanumanth Reddy Pothula wrote:
> 
> 
>> -----Original Message-----
>> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
>> Sent: Wednesday, September 7, 2022 4:54 PM
>> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Ding, Xuan
>> <xuan.ding@intel.com>; Thomas Monjalon <thomas@monjalon.net>; Andrew
>> Rybchenko <andrew.rybchenko@oktetlabs.ru>
>> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li, Xiaoyun
>> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
>> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
>> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
>> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
>> Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>
>> Subject: Re: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort capability
>>
>> On 9/7/2022 8:02 AM, Hanumanth Reddy Pothula wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
>>>> Sent: Tuesday, September 6, 2022 5:48 PM
>>>> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Ding, Xuan
>>>> <xuan.ding@intel.com>; Thomas Monjalon <thomas@monjalon.net>;
>> Andrew
>>>> Rybchenko <andrew.rybchenko@oktetlabs.ru>
>>>> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li, Xiaoyun
>>>> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
>>>> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
>>>> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
>>>> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran
>>>> <jerinj@marvell.com>; Nithin Kumar Dabilpuram
>>>> <ndabilpuram@marvell.com>
>>>> Subject: Re: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort
>>>> capability
>>>>
>>>> On 8/30/2022 1:08 PM, Hanumanth Reddy Pothula wrote:
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
>>>>>> Sent: Wednesday, August 24, 2022 9:04 PM
>>>>>> To: Ding, Xuan <xuan.ding@intel.com>; Hanumanth Reddy Pothula
>>>>>> <hpothula@marvell.com>; Thomas Monjalon <thomas@monjalon.net>;
>>>> Andrew
>>>>>> Rybchenko <andrew.rybchenko@oktetlabs.ru>
>>>>>> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li,
>> Xiaoyun
>>>>>> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
>>>>>> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
>>>>>> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
>>>>>> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran
>>>>>> <jerinj@marvell.com>; Nithin Kumar Dabilpuram
>>>>>> <ndabilpuram@marvell.com>
>>>>>> Subject: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort
>>>>>> capability
>>>>>>
>>>>>> External Email
>>>>>>
>>>>>> -------------------------------------------------------------------
>>>>>> --
>>>>>> -
>>>>>
>>>>>
>>>>> Thanks Ding Xuan and Ferruh Yigit for reviewing the changes and for
>>>>> providing
>>>> your valuable feedback.
>>>>> Please find responses inline.
>>>>>
>>>>>> On 8/23/2022 4:26 AM, Ding, Xuan wrote:
>>>>>>> Hi Hanumanth,
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: Hanumanth Pothula <hpothula@marvell.com>
>>>>>>>> Sent: Saturday, August 13, 2022 1:25 AM
>>>>>>>> To: Thomas Monjalon <thomas@monjalon.net>; Ferruh Yigit
>>>>>>>> <ferruh.yigit@xilinx.com>; Andrew Rybchenko
>>>>>>>> <andrew.rybchenko@oktetlabs.ru>
>>>>>>>> Cc: dev@dpdk.org; Ding, Xuan <xuan.ding@intel.com>; Wu, WenxuanX
>>>>>>>> <wenxuanx.wu@intel.com>; Li, Xiaoyun <xiaoyun.li@intel.com>;
>>>>>>>> stephen@networkplumber.org; Wang, YuanX
>> <yuanx.wang@intel.com>;
>>>>>>>> mdr@ashroe.eu; Zhang, Yuying <yuying.zhang@intel.com>; Zhang, Qi
>>>>>>>> Z <qi.z.zhang@intel.com>; viacheslavo@nvidia.com;
>>>>>>>> jerinj@marvell.com; ndabilpuram@marvell.com; Hanumanth Pothula
>>>>>>>> <hpothula@marvell.com>
>>>>>>>> Subject: [PATCH v2 1/3] ethdev: introduce pool sort capability
>>>>>>>>
>>>>>>>> Presently, the 'Buffer Split' feature supports sending multiple
>>>>>>>> segments of the received packet to PMD, which programs the HW to
>>>>>>>> receive the packet in segments from different pools.
>>>>>>>>
>>>>>>>> This patch extends the feature to support the pool sort capability.
>>>>>>>> Some of the HW has support for choosing memory pools based on the
>>>>>>>> packet's size. The pool sort capability allows PMD to choose a
>>>>>>>> memory pool based on the packet's length.
>>>>>>>>
>>>>>>>> This is often useful for saving the memory where the application
>>>>>>>> can create a different pool to steer the specific size of the
>>>>>>>> packet, thus enabling effective use of memory.
>>>>>>>>
>>>>>>>> For example, let's say HW has a capability of three pools,
>>>>>>>>      - pool-1 size is 2K
>>>>>>>>      - pool-2 size is > 2K and < 4K
>>>>>>>>      - pool-3 size is > 4K
>>>>>>>> Here,
>>>>>>>>             pool-1 can accommodate packets with sizes < 2K
>>>>>>>>             pool-2 can accommodate packets with sizes > 2K and < 4K
>>>>>>>>             pool-3 can accommodate packets with sizes > 4K
>>>>>>>>
>>>>>>>> With pool sort capability enabled in SW, an application may
>>>>>>>> create three pools of different sizes and send them to PMD.
>>>>>>>> Allowing PMD to program HW based on packet lengths. So that
>>>>>>>> packets with less than 2K are received on pool-1, packets with
>>>>>>>> lengths between 2K and 4K are received on pool-2 and finally
>>>>>>>> packets greater than 4K are received on pool-
>>>>>> 3.
>>>>>>>>
>>>>>>>> The following two capabilities are added to the
>>>>>>>> rte_eth_rxseg_capa structure, 1. pool_sort --> tells pool sort capability
>> is supported by HW.
>>>>>>>> 2. max_npool --> max number of pools supported by HW.
>>>>>>>>
>>>>>>>> Defined new structure rte_eth_rxseg_sort, to be used only when
>>>>>>>> pool sort capability is present. If required this may be extended
>>>>>>>> further to support more configurations.
>>>>>>>>
>>>>>>>> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
>>>>>>>>
>>>>>>>> v2:
>>>>>>>>      - Along with spec changes, uploading testpmd and driver changes.
>>>>>>>
>>>>>>> Thanks for CCing. It's an interesting feature.
>>>>>>>
>>>>>>> But I have one question here:
>>>>>>> Buffer split is for split receiving packets into multiple
>>>>>>> segments, while pool sort supports PMD to put the receiving
>>>>>>> packets into different pools
>>>>>> according to packet size.
>>>>>>> Every packet is still intact.
>>>>>>>
>>>>>>> So, at this level, pool sort does not belong to buffer split.
>>>>>>> And you already use a different function to check pool sort rather
>>>>>>> than check
>>>>>> buffer split.
>>>>>>>
>>>>>>> Should a new RX offload be introduced? like
>>>>>> "RTE_ETH_RX_OFFLOAD_POOL_SORT".
>>>>>>>
>>>>> Please find my response below.
>>>>>>
>>>>>> Hi Hanumanth,
>>>>>>
>>>>>> I had the similar concern with the feature. I assume you want to
>>>>>> benefit from exiting config structure that gets multiple mempool as
>>>>>> argument, since this feature also needs multiple mempools, but the
>>>>>> feature is
>>>> different.
>>>>>>
>>>>>> It looks to me wrong to check 'OFFLOAD_BUFFER_SPLIT' offload to
>>>>>> decide if to receive into multiple mempool or not, which doesn't
>>>>>> have
>>>> anything related split.
>>>>>> Also not sure about using the 'sort' keyword.
>>>>>> What do you think to introduce new fetaure, instead of extending
>>>>>> existing split one?
>>>>>
>>>>> Actually we thought both BUFFER_SPLIT and POOL_SORT are similar
>>>>> features where RX pools are configured in certain way and thought
>>>>> not use up one more RX offload capability, as the existing software
>>>>> architecture
>>>> can be extended to support pool_sort capability.
>>>>> Yes, as part of pool sort, there is no buffer split but pools are
>>>>> picked based on
>>>> the buffer length.
>>>>>
>>>>> Since you think it's better to use new RX offload for POOL_SORT,
>>>>> will go ahead
>>>> and implement the same.
>>>>>
>>>>>> This is optimisation, right? To enable us to use less memory for
>>>>>> the packet buffer, does it qualify to a device offload?
>>>>>>
>>>>> Yes, its qualify as a device offload and saves memory.
>>>>> Marvel NIC has a capability to receive packets on  two different
>>>>> pools based
>>>> on its length.
>>>>> Below explained more on the same.
>>>>>>
>>>>>> Also, what is the relation with segmented Rx, how a PMD decide to
>>>>>> use segmented Rx or bigger mempool? How can application can configure
>> this?
>>>>>>
>>>>>> Need to clarify the rules, based on your sample, if a 512 bytes
>>>>>> packet received, does it have to go pool-1, or can it go to any of three
>> pools?
>>>>>>
>>>>> Here, Marvell NIC supports two HW pools, SPB(small packet buffer)
>>>>> pool and
>>>> LPB(large packet buffer) pool.
>>>>> SPB pool can hold up to 4KB
>>>>> LPB pool can hold anything more than 4KB Smaller packets are
>>>>> received on SPB pool and larger packets on LPB pool, based on the RQ
>> configuration.
>>>>> Here, in our case HW pools holds whole packet. So if a packet is
>>>>> divided into segments, lower layer HW going to receive all segments
>>>>> of the packet and then going to place the whole packet in SPB/LPB
>>>>> pool, based
>>>> on the packet length.
>>>>>
>>>>
>>>> If the packet is bigger than 4KB, you have two options,
>>>> 1- Use multiple chained buffers in SPB
>>>> 2- Use single LPB buffer
>>>>
>>>> As I understand (2) is used in this case, but I think we should
>>>> clarify how this feature works with 'RTE_ETH_RX_OFFLOAD_SCATTER'
>>>> offload, if it is requested by user.
>>>>
>>>> Or lets say HW has two pools with 1K and 2K sizes, what is expected
>>>> with 4K packet, with or without scattered Rx offload?
>>>>
>>>
>>> As mentioned, Marvell supports two pools, pool-1(SPB) and pool-2(LPB)
>>> If the packet length is within pool-1 length and has only one segment then the
>> packet is allocated from pool-1.
>>> If the packet length is greater than pool-1 or has more than one segment then
>> the packet is allocated from pool-2.
>>>
>>> So, here packets with a single segment and length less than 1K are
>>> allocated from pool-1 and packets with multiple segments or packets with
>> length greater than 1K are allocated from pool-2.
>>>
>>
>> To have multiple segment or not is HW configuration, it is not external variable.
>> Drivers mostly decide to configure HW to receive multiple segment or not based
>> on buffer size and max packet size device support.
>> In this case since buffer size is not fixed, there are multiple buffer sizes, how
>> driver will configure HW?
>>
>> This is not specific to Marvell HW, for the case multiple mempool supported, it is
>> better to clarify in this patch how it is works with
>> 'RTE_ETH_RX_OFFLOAD_SCATTER' offload.
>>
> 
> Here, application sends multiple pools with different buffer lengths to PMD and PMD further programs HW depending on its architecture.
> Similarly, in this case, where multiple HW pools are present,  with 'RTE_ETH_RX_OFFLOAD_SCATTER' enabled, PMD receives packets/segments based on the HW architecture.
> Depending the architecture, if any extra programming is required(either to implement some logic or HW programming) in RX path, its to be implemented in that NIC PMD.

My intention is to clarify the relation between features, like Andrew 
suggested to have Rx segmentation on any pools including mixture of 
pools, that is an option, only please document this in the API 
documentation so that it is clear for future PMD/app developers.

> As far as multiple pool support is considered, in Marvell case, once HW pools are programmed in control path, there is nothing to be done on fast path.
> 

Out of curiosity, how this is transparent in fast path. Normally mbuf 
will point to a data buffer, if there are multiple pools, how this is 
not effected?
Is there any redirection, like mbuf buffer points to some kind of 
metedata (event etc..)?

> So, I think, It depends on HW architecture of how multiple pool and multiple segment receive is implemented.
> 
> Please suggest, if I am missing any generic scenario(use case) here.
> 
>>>>> As pools are picked based on the packets length we used SORT term.
>>>>> In case
>>>> you have any better term(word), please suggest.
>>>>>
>>>>
>>>> what about multiple pool, like RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL, I
>>>> think it is more clear but I would like to get more comments from
>>>> others, naming is hard ;)
>>>>
>>> Yes, RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL is clearer than
>> RTE_ETH_RX_OFFLOAD_SORT_POOL.
>>> Thanks for the suggestion.
>>> Will upload V4 with RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL.
>>>
>>>>>>
>>>>>> And I don't see any change in the 'net/cnxk' Rx burst code, when
>>>>>> multiple mempool used, while filling the mbufs shouldn't it check
>>>>>> which mempool is filled. How this works without update in the Rx
>>>>>> burst code, or am I missing some implementation detail?
>>>>>>
>>>>> Please find PMD changes in patch [v2,3/3] net/cnxk: introduce pool
>>>>> sort capability Here, in control path, HW pools are programmed based
>>>>> on the
>>>> inputs it received from the application.
>>>>> Once the HW is programmed, packets are received on HW pools based
>>>>> the
>>>> packets sizes.
>>>>
>>>> I was expecting to changes in datapath too, something like in Rx
>>>> burst function check if spb or lpb is used and update mbuf pointers
>> accordingly.
>>>> But it seems HW doesn't work this way, can you please explain how
>>>> this feature works transparent to datapath code?
>>>>
>>>>>>
>>>>>
>>>>> I will upload V3 where POOL_SORT is implemented as new RX OFFLOAD,
>>>>> unless
>>>> If you have any other suggestion/thoughts.
>>>>>
>>>>
>>>> <...>
>>>
> 


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v3 1/3] ethdev: introduce pool sort capability
  2022-09-13  8:06     ` [PATCH v3 1/3] ethdev: " Andrew Rybchenko
@ 2022-09-13  9:31       ` Ferruh Yigit
  2022-09-13 10:41         ` [EXT] " Hanumanth Reddy Pothula
  0 siblings, 1 reply; 75+ messages in thread
From: Ferruh Yigit @ 2022-09-13  9:31 UTC (permalink / raw)
  To: Andrew Rybchenko, Hanumanth Pothula, Thomas Monjalon
  Cc: dev, xuan.ding, wenxuanx.wu, xiaoyun.li, stephen, yuanx.wang,
	mdr, yuying.zhang, qi.z.zhang, viacheslavo, jerinj, ndabilpuram

On 9/13/2022 9:06 AM, Andrew Rybchenko wrote:
> On 9/2/22 10:00, Hanumanth Pothula wrote:
>> This patch adds support for the pool sort capability.
> 
> "Add support for serveral (?) mbuf pools per Rx queue."
> 
> I dislike the word "sort" in summary and the feature
> description. IMHO it is too restrictive for intended behaviour.
> 
> The key feature here is just support for more than one mbuf
> pool per Rx queue. That's it. Everything else should be out
> of scope of the definiteion.
> 

ack, and author already agreed to update it as 'MULTIPLE_POOL',
perhaps should we say 'MULTIPLE_MEMPOOL' ?

> If buffers from many pools are provided, the hardware may do
> whatever it wants with it. Use smaller buffers for small
> packets and bigger for big. Use bigger buffers for small
> packets if there is no small buffers available. Use big plus
> small buffer if Rx scatter is enabled and a packet fits in
> such combination. And so so on.
> 
> I.e. the feature should be orthogoal to Rx scatter.
> Rx scatter just says if driver/application allows to chain
> mbufs to receive a packet. If Rx scatter is disabled,
> a packet must be delivered in a single mbuf (either big or
> small). If Rx scatter is enable, a packet may be delivered
> using a chain of mbufs obtained from provided pools (either
> just one or many if several pools are supported).
> 
> Ideally the feature should be orthogonal to buffer split as
> well. I.e. provide many pools for different segments.
> May be it is an overkill to provide pools A and B for the first
> segment and C and D for the second. It could be limitted to the
> last segment only. If so, we need separate strcture (not
> rte_eth_rxseg) to pass many pools. IMHO, an array of mempools
> is sufficient - similar to Rx queue configuration.
> I.e. no extra length since data length may be derived from
> mempool element size.
> 
>> Some of the HW has support for choosing memory pools based on the
>> packet's size. The pool sort capability allows PMD to choose a
>> memory pool based on the packet's length.
>>
>> This is often useful for saving the memory where the application
>> can create a different pool to steer the specific size of the
>> packet, thus enabling effective use of memory.
>>
>> For example, let's say HW has a capability of three pools,
>>   - pool-1 size is 2K
>>   - pool-2 size is > 2K and < 4K
>>   - pool-3 size is > 4K
>> Here,
>>          pool-1 can accommodate packets with sizes < 2K
>>          pool-2 can accommodate packets with sizes > 2K and < 4K
>>          pool-3 can accommodate packets with sizes > 4K
>>
>> With pool sort capability enabled in SW, an application may create
>> three pools of different sizes and send them to PMD. Allowing PMD
>> to program HW based on packet lengths. So that packets with less
>> than 2K are received on pool-1, packets with lengths between 2K
>> and 4K are received on pool-2 and finally packets greater than 4K
>> are received on pool-3.
>>
>> The following two capabilities are added to the rte_eth_rxseg_capa
>> structure,
>> 1. pool_sort --> tells pool sort capability is supported by HW.
>> 2. max_npool --> max number of pools supported by HW.
>>
>> Defined new structure rte_eth_rxseg_sort, to be used only when pool
>> sort capability is present. If required this may be extended further
>> to support more configurations.
>>
>> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
>>
>> v3:
>>   - Implemented Pool Sort capability as new Rx offload capability,
>>     RTE_ETH_RX_OFFLOAD_BUFFER_SORT.
>> v2:
>>   - Along with spec changes, uploading testpmd and driver changes.
>> ---
> 
> [snip]
> 


^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort capability
  2022-09-13  9:28                 ` Ferruh Yigit
@ 2022-09-13 10:00                   ` Hanumanth Reddy Pothula
  0 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Reddy Pothula @ 2022-09-13 10:00 UTC (permalink / raw)
  To: Ferruh Yigit, Ding, Xuan, Thomas Monjalon, Andrew Rybchenko
  Cc: dev, Wu, WenxuanX, Li, Xiaoyun, stephen, Wang, YuanX, mdr, Zhang,
	Yuying, Zhang, Qi Z, viacheslavo, Jerin Jacob Kollanukkaran,
	Nithin Kumar Dabilpuram



> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> Sent: Tuesday, September 13, 2022 2:59 PM
> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Ding, Xuan
> <xuan.ding@intel.com>; Thomas Monjalon <thomas@monjalon.net>; Andrew
> Rybchenko <andrew.rybchenko@oktetlabs.ru>
> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li, Xiaoyun
> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>
> Subject: Re: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort capability
> 
> On 9/7/2022 10:31 PM, Hanumanth Reddy Pothula wrote:
> >
> >
> >> -----Original Message-----
> >> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> >> Sent: Wednesday, September 7, 2022 4:54 PM
> >> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Ding, Xuan
> >> <xuan.ding@intel.com>; Thomas Monjalon <thomas@monjalon.net>;
> Andrew
> >> Rybchenko <andrew.rybchenko@oktetlabs.ru>
> >> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li, Xiaoyun
> >> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
> >> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
> >> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
> >> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran
> >> <jerinj@marvell.com>; Nithin Kumar Dabilpuram
> >> <ndabilpuram@marvell.com>
> >> Subject: Re: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort
> >> capability
> >>
> >> On 9/7/2022 8:02 AM, Hanumanth Reddy Pothula wrote:
> >>>
> >>>
> >>>> -----Original Message-----
> >>>> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> >>>> Sent: Tuesday, September 6, 2022 5:48 PM
> >>>> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Ding, Xuan
> >>>> <xuan.ding@intel.com>; Thomas Monjalon <thomas@monjalon.net>;
> >> Andrew
> >>>> Rybchenko <andrew.rybchenko@oktetlabs.ru>
> >>>> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li,
> Xiaoyun
> >>>> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
> >>>> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
> >>>> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
> >>>> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran
> >>>> <jerinj@marvell.com>; Nithin Kumar Dabilpuram
> >>>> <ndabilpuram@marvell.com>
> >>>> Subject: Re: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort
> >>>> capability
> >>>>
> >>>> On 8/30/2022 1:08 PM, Hanumanth Reddy Pothula wrote:
> >>>>>
> >>>>>
> >>>>>> -----Original Message-----
> >>>>>> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> >>>>>> Sent: Wednesday, August 24, 2022 9:04 PM
> >>>>>> To: Ding, Xuan <xuan.ding@intel.com>; Hanumanth Reddy Pothula
> >>>>>> <hpothula@marvell.com>; Thomas Monjalon <thomas@monjalon.net>;
> >>>> Andrew
> >>>>>> Rybchenko <andrew.rybchenko@oktetlabs.ru>
> >>>>>> Cc: dev@dpdk.org; Wu, WenxuanX <wenxuanx.wu@intel.com>; Li,
> >> Xiaoyun
> >>>>>> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang, YuanX
> >>>>>> <yuanx.wang@intel.com>; mdr@ashroe.eu; Zhang, Yuying
> >>>>>> <yuying.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
> >>>>>> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran
> >>>>>> <jerinj@marvell.com>; Nithin Kumar Dabilpuram
> >>>>>> <ndabilpuram@marvell.com>
> >>>>>> Subject: [EXT] Re: [PATCH v2 1/3] ethdev: introduce pool sort
> >>>>>> capability
> >>>>>>
> >>>>>> External Email
> >>>>>>
> >>>>>> -----------------------------------------------------------------
> >>>>>> --
> >>>>>> --
> >>>>>> -
> >>>>>
> >>>>>
> >>>>> Thanks Ding Xuan and Ferruh Yigit for reviewing the changes and
> >>>>> for providing
> >>>> your valuable feedback.
> >>>>> Please find responses inline.
> >>>>>
> >>>>>> On 8/23/2022 4:26 AM, Ding, Xuan wrote:
> >>>>>>> Hi Hanumanth,
> >>>>>>>
> >>>>>>>> -----Original Message-----
> >>>>>>>> From: Hanumanth Pothula <hpothula@marvell.com>
> >>>>>>>> Sent: Saturday, August 13, 2022 1:25 AM
> >>>>>>>> To: Thomas Monjalon <thomas@monjalon.net>; Ferruh Yigit
> >>>>>>>> <ferruh.yigit@xilinx.com>; Andrew Rybchenko
> >>>>>>>> <andrew.rybchenko@oktetlabs.ru>
> >>>>>>>> Cc: dev@dpdk.org; Ding, Xuan <xuan.ding@intel.com>; Wu,
> >>>>>>>> WenxuanX <wenxuanx.wu@intel.com>; Li, Xiaoyun
> >>>>>>>> <xiaoyun.li@intel.com>; stephen@networkplumber.org; Wang,
> YuanX
> >> <yuanx.wang@intel.com>;
> >>>>>>>> mdr@ashroe.eu; Zhang, Yuying <yuying.zhang@intel.com>; Zhang,
> >>>>>>>> Qi Z <qi.z.zhang@intel.com>; viacheslavo@nvidia.com;
> >>>>>>>> jerinj@marvell.com; ndabilpuram@marvell.com; Hanumanth Pothula
> >>>>>>>> <hpothula@marvell.com>
> >>>>>>>> Subject: [PATCH v2 1/3] ethdev: introduce pool sort capability
> >>>>>>>>
> >>>>>>>> Presently, the 'Buffer Split' feature supports sending multiple
> >>>>>>>> segments of the received packet to PMD, which programs the HW
> >>>>>>>> to receive the packet in segments from different pools.
> >>>>>>>>
> >>>>>>>> This patch extends the feature to support the pool sort capability.
> >>>>>>>> Some of the HW has support for choosing memory pools based on
> >>>>>>>> the packet's size. The pool sort capability allows PMD to
> >>>>>>>> choose a memory pool based on the packet's length.
> >>>>>>>>
> >>>>>>>> This is often useful for saving the memory where the
> >>>>>>>> application can create a different pool to steer the specific
> >>>>>>>> size of the packet, thus enabling effective use of memory.
> >>>>>>>>
> >>>>>>>> For example, let's say HW has a capability of three pools,
> >>>>>>>>      - pool-1 size is 2K
> >>>>>>>>      - pool-2 size is > 2K and < 4K
> >>>>>>>>      - pool-3 size is > 4K
> >>>>>>>> Here,
> >>>>>>>>             pool-1 can accommodate packets with sizes < 2K
> >>>>>>>>             pool-2 can accommodate packets with sizes > 2K and < 4K
> >>>>>>>>             pool-3 can accommodate packets with sizes > 4K
> >>>>>>>>
> >>>>>>>> With pool sort capability enabled in SW, an application may
> >>>>>>>> create three pools of different sizes and send them to PMD.
> >>>>>>>> Allowing PMD to program HW based on packet lengths. So that
> >>>>>>>> packets with less than 2K are received on pool-1, packets with
> >>>>>>>> lengths between 2K and 4K are received on pool-2 and finally
> >>>>>>>> packets greater than 4K are received on pool-
> >>>>>> 3.
> >>>>>>>>
> >>>>>>>> The following two capabilities are added to the
> >>>>>>>> rte_eth_rxseg_capa structure, 1. pool_sort --> tells pool sort
> >>>>>>>> capability
> >> is supported by HW.
> >>>>>>>> 2. max_npool --> max number of pools supported by HW.
> >>>>>>>>
> >>>>>>>> Defined new structure rte_eth_rxseg_sort, to be used only when
> >>>>>>>> pool sort capability is present. If required this may be
> >>>>>>>> extended further to support more configurations.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> >>>>>>>>
> >>>>>>>> v2:
> >>>>>>>>      - Along with spec changes, uploading testpmd and driver changes.
> >>>>>>>
> >>>>>>> Thanks for CCing. It's an interesting feature.
> >>>>>>>
> >>>>>>> But I have one question here:
> >>>>>>> Buffer split is for split receiving packets into multiple
> >>>>>>> segments, while pool sort supports PMD to put the receiving
> >>>>>>> packets into different pools
> >>>>>> according to packet size.
> >>>>>>> Every packet is still intact.
> >>>>>>>
> >>>>>>> So, at this level, pool sort does not belong to buffer split.
> >>>>>>> And you already use a different function to check pool sort
> >>>>>>> rather than check
> >>>>>> buffer split.
> >>>>>>>
> >>>>>>> Should a new RX offload be introduced? like
> >>>>>> "RTE_ETH_RX_OFFLOAD_POOL_SORT".
> >>>>>>>
> >>>>> Please find my response below.
> >>>>>>
> >>>>>> Hi Hanumanth,
> >>>>>>
> >>>>>> I had the similar concern with the feature. I assume you want to
> >>>>>> benefit from exiting config structure that gets multiple mempool
> >>>>>> as argument, since this feature also needs multiple mempools, but
> >>>>>> the feature is
> >>>> different.
> >>>>>>
> >>>>>> It looks to me wrong to check 'OFFLOAD_BUFFER_SPLIT' offload to
> >>>>>> decide if to receive into multiple mempool or not, which doesn't
> >>>>>> have
> >>>> anything related split.
> >>>>>> Also not sure about using the 'sort' keyword.
> >>>>>> What do you think to introduce new fetaure, instead of extending
> >>>>>> existing split one?
> >>>>>
> >>>>> Actually we thought both BUFFER_SPLIT and POOL_SORT are similar
> >>>>> features where RX pools are configured in certain way and thought
> >>>>> not use up one more RX offload capability, as the existing
> >>>>> software architecture
> >>>> can be extended to support pool_sort capability.
> >>>>> Yes, as part of pool sort, there is no buffer split but pools are
> >>>>> picked based on
> >>>> the buffer length.
> >>>>>
> >>>>> Since you think it's better to use new RX offload for POOL_SORT,
> >>>>> will go ahead
> >>>> and implement the same.
> >>>>>
> >>>>>> This is optimisation, right? To enable us to use less memory for
> >>>>>> the packet buffer, does it qualify to a device offload?
> >>>>>>
> >>>>> Yes, its qualify as a device offload and saves memory.
> >>>>> Marvel NIC has a capability to receive packets on  two different
> >>>>> pools based
> >>>> on its length.
> >>>>> Below explained more on the same.
> >>>>>>
> >>>>>> Also, what is the relation with segmented Rx, how a PMD decide to
> >>>>>> use segmented Rx or bigger mempool? How can application can
> >>>>>> configure
> >> this?
> >>>>>>
> >>>>>> Need to clarify the rules, based on your sample, if a 512 bytes
> >>>>>> packet received, does it have to go pool-1, or can it go to any
> >>>>>> of three
> >> pools?
> >>>>>>
> >>>>> Here, Marvell NIC supports two HW pools, SPB(small packet buffer)
> >>>>> pool and
> >>>> LPB(large packet buffer) pool.
> >>>>> SPB pool can hold up to 4KB
> >>>>> LPB pool can hold anything more than 4KB Smaller packets are
> >>>>> received on SPB pool and larger packets on LPB pool, based on the
> >>>>> RQ
> >> configuration.
> >>>>> Here, in our case HW pools holds whole packet. So if a packet is
> >>>>> divided into segments, lower layer HW going to receive all
> >>>>> segments of the packet and then going to place the whole packet in
> >>>>> SPB/LPB pool, based
> >>>> on the packet length.
> >>>>>
> >>>>
> >>>> If the packet is bigger than 4KB, you have two options,
> >>>> 1- Use multiple chained buffers in SPB
> >>>> 2- Use single LPB buffer
> >>>>
> >>>> As I understand (2) is used in this case, but I think we should
> >>>> clarify how this feature works with 'RTE_ETH_RX_OFFLOAD_SCATTER'
> >>>> offload, if it is requested by user.
> >>>>
> >>>> Or lets say HW has two pools with 1K and 2K sizes, what is expected
> >>>> with 4K packet, with or without scattered Rx offload?
> >>>>
> >>>
> >>> As mentioned, Marvell supports two pools, pool-1(SPB) and
> >>> pool-2(LPB) If the packet length is within pool-1 length and has
> >>> only one segment then the
> >> packet is allocated from pool-1.
> >>> If the packet length is greater than pool-1 or has more than one
> >>> segment then
> >> the packet is allocated from pool-2.
> >>>
> >>> So, here packets with a single segment and length less than 1K are
> >>> allocated from pool-1 and packets with multiple segments or packets
> >>> with
> >> length greater than 1K are allocated from pool-2.
> >>>
> >>
> >> To have multiple segment or not is HW configuration, it is not external
> variable.
> >> Drivers mostly decide to configure HW to receive multiple segment or
> >> not based on buffer size and max packet size device support.
> >> In this case since buffer size is not fixed, there are multiple
> >> buffer sizes, how driver will configure HW?
> >>
> >> This is not specific to Marvell HW, for the case multiple mempool
> >> supported, it is better to clarify in this patch how it is works with
> >> 'RTE_ETH_RX_OFFLOAD_SCATTER' offload.
> >>
> >
> > Here, application sends multiple pools with different buffer lengths to PMD
> and PMD further programs HW depending on its architecture.
> > Similarly, in this case, where multiple HW pools are present,  with
> 'RTE_ETH_RX_OFFLOAD_SCATTER' enabled, PMD receives packets/segments
> based on the HW architecture.
> > Depending the architecture, if any extra programming is required(either to
> implement some logic or HW programming) in RX path, its to be implemented in
> that NIC PMD.
> 
> My intention is to clarify the relation between features, like Andrew suggested
> to have Rx segmentation on any pools including mixture of pools, that is an
> option, only please document this in the API documentation so that it is clear for
> future PMD/app developers.

Sure, will document this in the API documentation.
 
> 
> > As far as multiple pool support is considered, in Marvell case, once HW pools
> are programmed in control path, there is nothing to be done on fast path.
> >
> 
> Out of curiosity, how this is transparent in fast path. Normally mbuf will point to
> a data buffer, if there are multiple pools, how this is not effected?
> Is there any redirection, like mbuf buffer points to some kind of metedata (event
> etc..)?
> 
> > So, I think, It depends on HW architecture of how multiple pool and multiple
> segment receive is implemented.
> >
> > Please suggest, if I am missing any generic scenario(use case) here.
> >
> >>>>> As pools are picked based on the packets length we used SORT term.
> >>>>> In case
> >>>> you have any better term(word), please suggest.
> >>>>>
> >>>>
> >>>> what about multiple pool, like RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL, I
> >>>> think it is more clear but I would like to get more comments from
> >>>> others, naming is hard ;)
> >>>>
> >>> Yes, RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL is clearer than
> >> RTE_ETH_RX_OFFLOAD_SORT_POOL.
> >>> Thanks for the suggestion.
> >>> Will upload V4 with RTE_ETH_RX_OFFLOAD_MULTIPLE_POOL.
> >>>
> >>>>>>
> >>>>>> And I don't see any change in the 'net/cnxk' Rx burst code, when
> >>>>>> multiple mempool used, while filling the mbufs shouldn't it check
> >>>>>> which mempool is filled. How this works without update in the Rx
> >>>>>> burst code, or am I missing some implementation detail?
> >>>>>>
> >>>>> Please find PMD changes in patch [v2,3/3] net/cnxk: introduce pool
> >>>>> sort capability Here, in control path, HW pools are programmed
> >>>>> based on the
> >>>> inputs it received from the application.
> >>>>> Once the HW is programmed, packets are received on HW pools based
> >>>>> the
> >>>> packets sizes.
> >>>>
> >>>> I was expecting to changes in datapath too, something like in Rx
> >>>> burst function check if spb or lpb is used and update mbuf pointers
> >> accordingly.
> >>>> But it seems HW doesn't work this way, can you please explain how
> >>>> this feature works transparent to datapath code?
> >>>>
> >>>>>>
> >>>>>
> >>>>> I will upload V3 where POOL_SORT is implemented as new RX OFFLOAD,
> >>>>> unless
> >>>> If you have any other suggestion/thoughts.
> >>>>>
> >>>>
> >>>> <...>
> >>>
> >


^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [EXT] Re: [PATCH v3 1/3] ethdev: introduce pool sort capability
  2022-09-13  9:31       ` Ferruh Yigit
@ 2022-09-13 10:41         ` Hanumanth Reddy Pothula
  0 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Reddy Pothula @ 2022-09-13 10:41 UTC (permalink / raw)
  To: Ferruh Yigit, Andrew Rybchenko, Thomas Monjalon
  Cc: dev, xuan.ding, wenxuanx.wu, xiaoyun.li, stephen, yuanx.wang,
	mdr, yuying.zhang, qi.z.zhang, viacheslavo,
	Jerin Jacob Kollanukkaran, Nithin Kumar Dabilpuram



> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> Sent: Tuesday, September 13, 2022 3:01 PM
> To: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>; Hanumanth Reddy
> Pothula <hpothula@marvell.com>; Thomas Monjalon <thomas@monjalon.net>
> Cc: dev@dpdk.org; xuan.ding@intel.com; wenxuanx.wu@intel.com;
> xiaoyun.li@intel.com; stephen@networkplumber.org; yuanx.wang@intel.com;
> mdr@ashroe.eu; yuying.zhang@intel.com; qi.z.zhang@intel.com;
> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>
> Subject: [EXT] Re: [PATCH v3 1/3] ethdev: introduce pool sort capability
> 
> External Email
> 
> ----------------------------------------------------------------------
> On 9/13/2022 9:06 AM, Andrew Rybchenko wrote:
> > On 9/2/22 10:00, Hanumanth Pothula wrote:
> >> This patch adds support for the pool sort capability.
> >
> > "Add support for serveral (?) mbuf pools per Rx queue."
> >
> > I dislike the word "sort" in summary and the feature description. IMHO
> > it is too restrictive for intended behaviour.
> >
> > The key feature here is just support for more than one mbuf pool per
> > Rx queue. That's it. Everything else should be out of scope of the
> > definiteion.
> >
> 
> ack, and author already agreed to update it as 'MULTIPLE_POOL', perhaps
> should we say 'MULTIPLE_MEMPOOL' ?

Yes, will  take care naming.

> 
> > If buffers from many pools are provided, the hardware may do whatever
> > it wants with it. Use smaller buffers for small packets and bigger for
> > big. Use bigger buffers for small packets if there is no small buffers
> > available. Use big plus small buffer if Rx scatter is enabled and a
> > packet fits in such combination. And so so on.
> >
> > I.e. the feature should be orthogoal to Rx scatter.
> > Rx scatter just says if driver/application allows to chain mbufs to
> > receive a packet. If Rx scatter is disabled, a packet must be
> > delivered in a single mbuf (either big or small). If Rx scatter is
> > enable, a packet may be delivered using a chain of mbufs obtained from
> > provided pools (either just one or many if several pools are
> > supported).
> >
> > Ideally the feature should be orthogonal to buffer split as well. I.e.
> > provide many pools for different segments.
> > May be it is an overkill to provide pools A and B for the first
> > segment and C and D for the second. It could be limitted to the last
> > segment only. If so, we need separate strcture (not
> > rte_eth_rxseg) to pass many pools. IMHO, an array of mempools is
> > sufficient - similar to Rx queue configuration.
> > I.e. no extra length since data length may be derived from mempool
> > element size.

Thanks Andrew for your valuable inputs.
Yes, an array of mempools works fine.

Will upload V4 by taking care above.

> >
> >> Some of the HW has support for choosing memory pools based on the
> >> packet's size. The pool sort capability allows PMD to choose a memory
> >> pool based on the packet's length.
> >>
> >> This is often useful for saving the memory where the application can
> >> create a different pool to steer the specific size of the packet,
> >> thus enabling effective use of memory.
> >>
> >> For example, let's say HW has a capability of three pools,
> >>   - pool-1 size is 2K
> >>   - pool-2 size is > 2K and < 4K
> >>   - pool-3 size is > 4K
> >> Here,
> >>          pool-1 can accommodate packets with sizes < 2K
> >>          pool-2 can accommodate packets with sizes > 2K and < 4K
> >>          pool-3 can accommodate packets with sizes > 4K
> >>
> >> With pool sort capability enabled in SW, an application may create
> >> three pools of different sizes and send them to PMD. Allowing PMD to
> >> program HW based on packet lengths. So that packets with less than 2K
> >> are received on pool-1, packets with lengths between 2K and 4K are
> >> received on pool-2 and finally packets greater than 4K are received
> >> on pool-3.
> >>
> >> The following two capabilities are added to the rte_eth_rxseg_capa
> >> structure, 1. pool_sort --> tells pool sort capability is supported
> >> by HW.
> >> 2. max_npool --> max number of pools supported by HW.
> >>
> >> Defined new structure rte_eth_rxseg_sort, to be used only when pool
> >> sort capability is present. If required this may be extended further
> >> to support more configurations.
> >>
> >> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> >>
> >> v3:
> >>   - Implemented Pool Sort capability as new Rx offload capability,
> >>     RTE_ETH_RX_OFFLOAD_BUFFER_SORT.
> >> v2:
> >>   - Along with spec changes, uploading testpmd and driver changes.
> >> ---
> >
> > [snip]
> >


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v4 1/3] ethdev: Add support for mulitiple mbuf pools per Rx queue
  2022-09-02  7:00   ` [PATCH v3 " Hanumanth Pothula
                       ` (2 preceding siblings ...)
  2022-09-13  8:06     ` [PATCH v3 1/3] ethdev: " Andrew Rybchenko
@ 2022-09-15  7:07     ` Hanumanth Pothula
  2022-09-15  7:07       ` [PATCH v4 2/3] app/testpmd: " Hanumanth Pothula
                         ` (3 more replies)
  3 siblings, 4 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-09-15  7:07 UTC (permalink / raw)
  To: Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko
  Cc: dev, xuan.ding, wenxuanx.wu, xiaoyun.li, stephen, yuanx.wang,
	mdr, yuying.zhang, qi.z.zhang, viacheslavo, jerinj, ndabilpuram,
	Hanumanth Pothula

This patch adds support for multiple mempool capability.
Some of the HW has support for choosing memory pools based on the
packet's size. Thiscapability allows PMD to choose a memory pool
based on the packet's length.

This is often useful for saving the memory where the application
can create a different pool to steer the specific size of the
packet, thus enabling effective use of memory.

For example, let's say HW has a capability of three pools,
 - pool-1 size is 2K
 - pool-2 size is > 2K and < 4K
 - pool-3 size is > 4K
Here,
        pool-1 can accommodate packets with sizes < 2K
        pool-2 can accommodate packets with sizes > 2K and < 4K
        pool-3 can accommodate packets with sizes > 4K

With multiple mempool capability enabled in SW, an application may
create three pools of different sizes and send them to PMD. Allowing
PMD to program HW based on the packet lengths. So that packets with
less than 2K are received on pool-1, packets with lengths between 2K
and 4K are received on pool-2 and finally packets greater than 4K
are received on pool-3.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>

v4:
 - Renamed Offload capability name from RTE_ETH_RX_OFFLOAD_BUFFER_SORT
   to RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL.
 - In struct rte_eth_rxconf, defined new pointer, which holds array of
   type struct rte_eth_rx_mempool(memory pools). This array is used
   by PMD to program multiple mempools.
v3:
 - Implemented Pool Sort capability as new Rx offload capability,
   RTE_ETH_RX_OFFLOAD_BUFFER_SORT.
v2:
 - Along with spec changes, uploading testpmd and driver changes.
---
 lib/ethdev/rte_ethdev.c | 78 ++++++++++++++++++++++++++++++++++-------
 lib/ethdev/rte_ethdev.h | 24 +++++++++++++
 2 files changed, 89 insertions(+), 13 deletions(-)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 1979dc0850..8618d6b01d 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1634,6 +1634,45 @@ rte_eth_dev_is_removed(uint16_t port_id)
 	return ret;
 }
 
+static int
+rte_eth_rx_queue_check_mempool(const struct rte_eth_rx_mempool *rx_mempool,
+			       uint16_t n_pool, uint32_t *mbp_buf_size,
+			       const struct rte_eth_dev_info *dev_info)
+{
+	uint16_t pool_idx;
+
+	if (n_pool > dev_info->max_pools) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid capabilities, max pools supported %u\n",
+			       dev_info->max_pools);
+		return -EINVAL;
+	}
+
+	for (pool_idx = 0; pool_idx < n_pool; pool_idx++) {
+		struct rte_mempool *mpl = rx_mempool[pool_idx].mp;
+
+		if (mpl == NULL) {
+			RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");
+			return -EINVAL;
+		}
+
+		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
+		if (*mbp_buf_size < dev_info->min_rx_bufsize +
+		    RTE_PKTMBUF_HEADROOM) {
+			RTE_ETHDEV_LOG(ERR,
+				       "%s mbuf_data_room_size %u < %u (RTE_PKTMBUF_HEADROOM=%u + min_rx_bufsize(dev)=%u)\n",
+					mpl->name, *mbp_buf_size,
+					RTE_PKTMBUF_HEADROOM + dev_info->min_rx_bufsize,
+					RTE_PKTMBUF_HEADROOM,
+					dev_info->min_rx_bufsize);
+			return -EINVAL;
+		}
+
+	}
+
+	return 0;
+}
+
 static int
 rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 			     uint16_t n_seg, uint32_t *mbp_buf_size,
@@ -1733,7 +1772,8 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 
 	if (mp != NULL) {
 		/* Single pool configuration check. */
-		if (rx_conf != NULL && rx_conf->rx_nseg != 0) {
+		if (rx_conf != NULL &&
+		    (rx_conf->rx_nseg != 0 ||  rx_conf->rx_npool)) {
 			RTE_ETHDEV_LOG(ERR,
 				       "Ambiguous segment configuration\n");
 			return -EINVAL;
@@ -1763,30 +1803,42 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 				       dev_info.min_rx_bufsize);
 			return -EINVAL;
 		}
-	} else {
-		const struct rte_eth_rxseg_split *rx_seg;
-		uint16_t n_seg;
+	} else if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT ||
+		  rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) {
 
-		/* Extended multi-segment configuration check. */
-		if (rx_conf == NULL || rx_conf->rx_seg == NULL || rx_conf->rx_nseg == 0) {
+		/* Extended multi-segment/pool configuration check. */
+		if (rx_conf == NULL ||
+		    (rx_conf->rx_seg == NULL && rx_conf->rx_mempool == NULL) ||
+		    (rx_conf->rx_nseg == 0 && rx_conf->rx_npool == 0)) {
 			RTE_ETHDEV_LOG(ERR,
 				       "Memory pool is null and no extended configuration provided\n");
 			return -EINVAL;
 		}
 
-		rx_seg = (const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
-		n_seg = rx_conf->rx_nseg;
-
 		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+			const struct rte_eth_rxseg_split *rx_seg =
+				(const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
+			uint16_t n_seg = rx_conf->rx_nseg;
 			ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
 							   &mbp_buf_size,
 							   &dev_info);
-			if (ret != 0)
+			if (ret)
 				return ret;
-		} else {
-			RTE_ETHDEV_LOG(ERR, "No Rx segmentation offload configured\n");
-			return -EINVAL;
 		}
+		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) {
+			const struct rte_eth_rx_mempool *rx_mempool =
+				(const struct rte_eth_rx_mempool *)rx_conf->rx_mempool;
+			ret = rte_eth_rx_queue_check_mempool(rx_mempool,
+							     rx_conf->rx_npool,
+							     &mbp_buf_size,
+							     &dev_info);
+			if (ret)
+				return ret;
+
+		}
+	} else {
+		RTE_ETHDEV_LOG(ERR, "No Rx offload is configured\n");
+		return -EINVAL;
 	}
 
 	/* Use default specified by driver, if nb_rx_desc is zero */
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index b62ac5bb6f..17deec2cbd 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1035,6 +1035,11 @@ union rte_eth_rxseg {
 	/* The other features settings should be added here. */
 };
 
+/* A common structure used to describe mbuf pools per Rx queue */
+struct rte_eth_rx_mempool {
+	struct rte_mempool *mp;
+};
+
 /**
  * A structure used to configure an Rx ring of an Ethernet port.
  */
@@ -1067,6 +1072,23 @@ struct rte_eth_rxconf {
 	 */
 	union rte_eth_rxseg *rx_seg;
 
+	/**
+	 * Points to an array of mempools.
+	 *
+	 * This provides support for  multiple mbuf pools per Rx queue.
+	 *
+	 * This is often useful for saving the memory where the application can
+	 * create a different pools to steer the specific size of the packet, thus
+	 * enabling effective use of memory.
+	 *
+	 * Note that on Rx scatter enable, a packet may be delivered using a chain
+	 * of mbufs obtained from single mempool or multiple mempools based on
+	 * the NIC implementation.
+	 *
+	 */
+	struct rte_eth_rx_mempool *rx_mempool;
+	uint16_t rx_npool; /** < number of mempools */
+
 	uint64_t reserved_64s[2]; /**< Reserved for future fields */
 	void *reserved_ptrs[2];   /**< Reserved for future fields */
 };
@@ -1395,6 +1417,7 @@ struct rte_eth_conf {
 #define RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM  RTE_BIT64(18)
 #define RTE_ETH_RX_OFFLOAD_RSS_HASH         RTE_BIT64(19)
 #define RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT     RTE_BIT64(20)
+#define RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL      RTE_BIT64(21)
 
 #define RTE_ETH_RX_OFFLOAD_CHECKSUM (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \
 				 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | \
@@ -1615,6 +1638,7 @@ struct rte_eth_dev_info {
 	/** Configured number of Rx/Tx queues */
 	uint16_t nb_rx_queues; /**< Number of Rx queues. */
 	uint16_t nb_tx_queues; /**< Number of Tx queues. */
+	uint16_t max_pools;
 	/** Rx parameter recommendations */
 	struct rte_eth_dev_portconf default_rxportconf;
 	/** Tx parameter recommendations */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v4 2/3] app/testpmd: Add support for mulitiple mbuf pools per Rx queue
  2022-09-15  7:07     ` [PATCH v4 1/3] ethdev: Add support for mulitiple mbuf pools per Rx queue Hanumanth Pothula
@ 2022-09-15  7:07       ` Hanumanth Pothula
  2022-09-15  7:07       ` [PATCH v4 3/3] net/cnxk: Add support for mulitiple mbuf pools Hanumanth Pothula
                         ` (2 subsequent siblings)
  3 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-09-15  7:07 UTC (permalink / raw)
  To: Aman Singh, Yuying Zhang
  Cc: dev, andrew.rybchenko, xuan.ding, wenxuanx.wu, thomas,
	xiaoyun.li, stephen, yuanx.wang, mdr, ferruh.yigit, qi.z.zhang,
	viacheslavo, jerinj, ndabilpuram, Hanumanth Pothula

This patch adds support for the mulitiple mempool.
Some of the HW has support for choosing memory pools based on the
packet's size. The pool sort capability allows PMD to choose a
memory pool based on the packet's length.

On multiple mempool support enabled, populate mempool array and
also print pool name on which packet is received.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
---
 app/test-pmd/testpmd.c | 41 +++++++++++++++++++++++++++++------------
 app/test-pmd/testpmd.h |  3 +++
 app/test-pmd/util.c    |  4 ++--
 3 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 77741fc41f..d16a552e6d 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2624,11 +2624,13 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
 {
 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
+	struct rte_eth_rx_mempool rx_mempool[MAX_MEMPOOL] = {};
 	unsigned int i, mp_n;
 	int ret;
 
 	if (rx_pkt_nb_segs <= 1 ||
-	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
+	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT ||
+	     rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) == 0) {
 		rx_conf->rx_seg = NULL;
 		rx_conf->rx_nseg = 0;
 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
@@ -2637,7 +2639,8 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		goto exit;
 	}
 	for (i = 0; i < rx_pkt_nb_segs; i++) {
-		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
+		struct rte_eth_rxseg_split *rx_split = &rx_useg[i].split;
+		struct rte_eth_rx_mempool *mempool = &rx_mempool[i];
 		struct rte_mempool *mpx;
 		/*
 		 * Use last valid pool for the segments with number
@@ -2645,16 +2648,30 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		 */
 		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
 		mpx = mbuf_pool_find(socket_id, mp_n);
-		/* Handle zero as mbuf data buffer size. */
-		rx_seg->length = rx_pkt_seg_lengths[i] ?
-				   rx_pkt_seg_lengths[i] :
-				   mbuf_data_size[mp_n];
-		rx_seg->offset = i < rx_pkt_nb_offs ?
-				   rx_pkt_seg_offsets[i] : 0;
-		rx_seg->mp = mpx ? mpx : mp;
-	}
-	rx_conf->rx_nseg = rx_pkt_nb_segs;
-	rx_conf->rx_seg = rx_useg;
+		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+			/**
+			 * On Segment length zero, update length as,
+			 *      buffer size - headroom size
+			 * to make sure enough space is accomidate for header.
+			 */
+			rx_split->length = rx_pkt_seg_lengths[i] ?
+					   rx_pkt_seg_lengths[i] :
+					   mbuf_data_size[mp_n] - RTE_PKTMBUF_HEADROOM;
+			rx_split->offset = i < rx_pkt_nb_offs ?
+					   rx_pkt_seg_offsets[i] : 0;
+			rx_split->mp = mpx ? mpx : mp;
+		}
+		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL)
+			mempool->mp = mpx ? mpx : mp;
+	}
+	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+		rx_conf->rx_nseg = rx_pkt_nb_segs;
+		rx_conf->rx_seg = rx_useg;
+	}
+	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) {
+		rx_conf->rx_mempool = rx_mempool;
+		rx_conf->rx_npool = rx_pkt_nb_segs;
+	}
 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
 				    socket_id, rx_conf, NULL);
 	rx_conf->rx_seg = NULL;
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index ddf5e21849..15a26171e2 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -82,6 +82,9 @@ extern uint8_t cl_quit;
 
 #define MIN_TOTAL_NUM_MBUFS 1024
 
+/* Maximum number of pools supprted per Rx queue */
+#define MAX_MEMPOOL 8
+
 typedef uint8_t  lcoreid_t;
 typedef uint16_t portid_t;
 typedef uint16_t queueid_t;
diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index fd98e8b51d..f9df5f69ef 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
 				 print_buf, buf_size, &cur_len);
 		MKDUMPSTR(print_buf, buf_size, cur_len,
-			  " - type=0x%04x - length=%u - nb_segs=%d",
-			  eth_type, (unsigned int) mb->pkt_len,
+			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
+			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
 		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v4 3/3] net/cnxk: Add support for mulitiple mbuf pools
  2022-09-15  7:07     ` [PATCH v4 1/3] ethdev: Add support for mulitiple mbuf pools per Rx queue Hanumanth Pothula
  2022-09-15  7:07       ` [PATCH v4 2/3] app/testpmd: " Hanumanth Pothula
@ 2022-09-15  7:07       ` Hanumanth Pothula
  2022-09-28  9:43       ` [PATCH v4 1/3] ethdev: Add support for mulitiple mbuf pools per Rx queue Andrew Rybchenko
  2022-10-06 17:01       ` [PATCH v5 1/3] ethdev: support " Hanumanth Pothula
  3 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-09-15  7:07 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
  Cc: dev, andrew.rybchenko, xuan.ding, wenxuanx.wu, thomas,
	xiaoyun.li, stephen, yuanx.wang, mdr, ferruh.yigit, yuying.zhang,
	qi.z.zhang, viacheslavo, jerinj, Hanumanth Pothula

Presently, HW is programmed only to receive packets from LPB pool.
Making all packets received from LPB pool.

But, CNXK HW supports two pools,
 - SPB -> packets with smaller size (less than 4K)
 - LPB -> packets with bigger size (greater than 4K)

Patch enables multiple mempool capability, pool is selected based
on the packet's length. So, basically, PMD programs HW for receiving
packets from both SPB and LPB pools based on the packet's length.

This is achieved by enabling rx multiple mempool offload,
RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL. This allows the application to send
more than one pool(in our case two) to the driver, with different
segment(packet) lengths, which helps the driver to configure both
pools based on segment lengths.

This is often useful for saving the memory where the application
can create a different pool to steer the specific size of the
packet, thus enabling effective use of memory.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
---
 doc/guides/nics/features/cnxk.ini     |  1 +
 doc/guides/nics/features/cnxk_vec.ini |  1 +
 drivers/net/cnxk/cnxk_ethdev.c        | 77 +++++++++++++++++++++++----
 drivers/net/cnxk/cnxk_ethdev.h        |  4 +-
 drivers/net/cnxk/cnxk_ethdev_ops.c    |  3 ++
 5 files changed, 76 insertions(+), 10 deletions(-)

diff --git a/doc/guides/nics/features/cnxk.ini b/doc/guides/nics/features/cnxk.ini
index 1876fe86c7..ed778ba398 100644
--- a/doc/guides/nics/features/cnxk.ini
+++ b/doc/guides/nics/features/cnxk.ini
@@ -4,6 +4,7 @@
 ; Refer to default.ini for the full list of available PMD features.
 ;
 [Features]
+multiple mempools    = Y
 Speed capabilities   = Y
 Rx interrupt         = Y
 Lock-free Tx queue   = Y
diff --git a/doc/guides/nics/features/cnxk_vec.ini b/doc/guides/nics/features/cnxk_vec.ini
index 5d0976e6ce..c2270fe338 100644
--- a/doc/guides/nics/features/cnxk_vec.ini
+++ b/doc/guides/nics/features/cnxk_vec.ini
@@ -4,6 +4,7 @@
 ; Refer to default.ini for the full list of available PMD features.
 ;
 [Features]
+multiple mempools    = Y
 Speed capabilities   = Y
 Rx interrupt         = Y
 Lock-free Tx queue   = Y
diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c
index a089cc463b..5c962d6388 100644
--- a/drivers/net/cnxk/cnxk_ethdev.c
+++ b/drivers/net/cnxk/cnxk_ethdev.c
@@ -537,6 +537,51 @@ cnxk_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid)
 	plt_free(txq_sp);
 }
 
+static int
+cnxk_nix_process_rx_conf(const struct rte_eth_rxconf *rx_conf,
+			 struct rte_mempool **lpb_pool, struct rte_mempool **spb_pool)
+{
+	struct rte_mempool *pool0;
+	struct rte_mempool *pool1;
+	const char *platform_ops;
+	struct rte_mempool_ops *ops;
+
+	if (*lpb_pool || !rx_conf->rx_mempool ||
+	    rx_conf->rx_npool != CNXK_NIX_NUM_POOLS_MAX) {
+		plt_err("invalid arguments");
+		return -EINVAL;
+	}
+
+	pool0 = rx_conf->rx_mempool[0].mp;
+	pool1 = rx_conf->rx_mempool[1].mp;
+
+	if (pool0->elt_size > pool1->elt_size) {
+		*lpb_pool = pool0;
+		*spb_pool = pool1;
+
+	} else {
+		*lpb_pool = pool1;
+		*spb_pool = pool0;
+	}
+
+	if ((*spb_pool)->pool_id == 0) {
+		plt_err("Invalid pool_id");
+		return -EINVAL;
+	}
+
+	platform_ops = rte_mbuf_platform_mempool_ops();
+	ops = rte_mempool_get_ops((*spb_pool)->ops_index);
+	if (strncmp(ops->name, platform_ops, RTE_MEMPOOL_OPS_NAMESIZE)) {
+		plt_err("mempool ops should be of cnxk_npa type");
+		return -EINVAL;
+	}
+
+	plt_info("spb_pool:%s lpb_pool:%s lpb_len:%u spb_len:%u\n", (*spb_pool)->name,
+		 (*lpb_pool)->name, (*lpb_pool)->elt_size, (*spb_pool)->elt_size);
+
+	return 0;
+}
+
 int
 cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			uint32_t nb_desc, uint16_t fp_rx_q_sz,
@@ -553,6 +598,8 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	uint16_t first_skip;
 	int rc = -EINVAL;
 	size_t rxq_sz;
+	struct rte_mempool *lpb_pool = mp;
+	struct rte_mempool *spb_pool = NULL;
 
 	/* Sanity checks */
 	if (rx_conf->rx_deferred_start == 1) {
@@ -560,15 +607,21 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 		goto fail;
 	}
 
+	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) {
+		rc = cnxk_nix_process_rx_conf(rx_conf, &lpb_pool, &spb_pool);
+		if (rc)
+			goto fail;
+	}
+
 	platform_ops = rte_mbuf_platform_mempool_ops();
 	/* This driver needs cnxk_npa mempool ops to work */
-	ops = rte_mempool_get_ops(mp->ops_index);
+	ops = rte_mempool_get_ops(lpb_pool->ops_index);
 	if (strncmp(ops->name, platform_ops, RTE_MEMPOOL_OPS_NAMESIZE)) {
 		plt_err("mempool ops should be of cnxk_npa type");
 		goto fail;
 	}
 
-	if (mp->pool_id == 0) {
+	if (lpb_pool->pool_id == 0) {
 		plt_err("Invalid pool_id");
 		goto fail;
 	}
@@ -585,13 +638,13 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Its a no-op when inline device is not used */
 	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY ||
 	    dev->tx_offloads & RTE_ETH_TX_OFFLOAD_SECURITY)
-		roc_nix_inl_dev_xaq_realloc(mp->pool_id);
+		roc_nix_inl_dev_xaq_realloc(lpb_pool->pool_id);
 
 	/* Increase CQ size to Aura size to avoid CQ overflow and
 	 * then CPT buffer leak.
 	 */
 	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY)
-		nb_desc = nix_inl_cq_sz_clamp_up(nix, mp, nb_desc);
+		nb_desc = nix_inl_cq_sz_clamp_up(nix, lpb_pool, nb_desc);
 
 	/* Setup ROC CQ */
 	cq = &dev->cqs[qid];
@@ -606,23 +659,29 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Setup ROC RQ */
 	rq = &dev->rqs[qid];
 	rq->qid = qid;
-	rq->aura_handle = mp->pool_id;
+	rq->aura_handle = lpb_pool->pool_id;
 	rq->flow_tag_width = 32;
 	rq->sso_ena = false;
 
 	/* Calculate first mbuf skip */
 	first_skip = (sizeof(struct rte_mbuf));
 	first_skip += RTE_PKTMBUF_HEADROOM;
-	first_skip += rte_pktmbuf_priv_size(mp);
+	first_skip += rte_pktmbuf_priv_size(lpb_pool);
 	rq->first_skip = first_skip;
 	rq->later_skip = sizeof(struct rte_mbuf);
-	rq->lpb_size = mp->elt_size;
+	rq->lpb_size = lpb_pool->elt_size;
 	rq->lpb_drop_ena = !(dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY);
 
 	/* Enable Inline IPSec on RQ, will not be used for Poll mode */
 	if (roc_nix_inl_inb_is_enabled(nix))
 		rq->ipsech_ena = true;
 
+	if (spb_pool) {
+		rq->spb_ena = 1;
+		rq->spb_aura_handle = spb_pool->pool_id;
+		rq->spb_size = spb_pool->elt_size;
+	}
+
 	rc = roc_nix_rq_init(&dev->nix, rq, !!eth_dev->data->dev_started);
 	if (rc) {
 		plt_err("Failed to init roc rq for rq=%d, rc=%d", qid, rc);
@@ -645,7 +704,7 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Queue config should reflect global offloads */
 	rxq_sp->qconf.conf.rx.offloads = dev->rx_offloads;
 	rxq_sp->qconf.nb_desc = nb_desc;
-	rxq_sp->qconf.mp = mp;
+	rxq_sp->qconf.mp = lpb_pool;
 	rxq_sp->tc = 0;
 	rxq_sp->tx_pause = (dev->fc_cfg.mode == RTE_ETH_FC_FULL ||
 			    dev->fc_cfg.mode == RTE_ETH_FC_TX_PAUSE);
@@ -664,7 +723,7 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			goto free_mem;
 	}
 
-	plt_nix_dbg("rq=%d pool=%s nb_desc=%d->%d", qid, mp->name, nb_desc,
+	plt_nix_dbg("rq=%d pool=%s nb_desc=%d->%d", qid, lpb_pool->name, nb_desc,
 		    cq->nb_desc);
 
 	/* Store start of fast path area */
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index f11a9a0b63..194619e7b3 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -44,6 +44,8 @@
 #define CNXK_NIX_RX_DEFAULT_RING_SZ 4096
 /* Max supported SQB count */
 #define CNXK_NIX_TX_MAX_SQB 512
+/* LPB & SPB */
+#define CNXK_NIX_NUM_POOLS_MAX 2
 
 /* If PTP is enabled additional SEND MEM DESC is required which
  * takes 2 words, hence max 7 iova address are possible
@@ -83,7 +85,7 @@
 	 RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM | RTE_ETH_RX_OFFLOAD_SCATTER |    \
 	 RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_RSS_HASH |    \
 	 RTE_ETH_RX_OFFLOAD_TIMESTAMP | RTE_ETH_RX_OFFLOAD_VLAN_STRIP |        \
-	 RTE_ETH_RX_OFFLOAD_SECURITY)
+	 RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL | RTE_ETH_RX_OFFLOAD_SECURITY)
 
 #define RSS_IPV4_ENABLE                                                        \
 	(RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |                            \
diff --git a/drivers/net/cnxk/cnxk_ethdev_ops.c b/drivers/net/cnxk/cnxk_ethdev_ops.c
index 1592971073..49ca7a90f1 100644
--- a/drivers/net/cnxk/cnxk_ethdev_ops.c
+++ b/drivers/net/cnxk/cnxk_ethdev_ops.c
@@ -69,6 +69,9 @@ cnxk_nix_info_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *devinfo)
 	devinfo->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
 			    RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP |
 			    RTE_ETH_DEV_CAPA_FLOW_RULE_KEEP;
+
+	devinfo->max_pools = CNXK_NIX_NUM_POOLS_MAX;
+
 	return 0;
 }
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v4 1/3] ethdev: Add support for mulitiple mbuf pools per Rx queue
  2022-09-15  7:07     ` [PATCH v4 1/3] ethdev: Add support for mulitiple mbuf pools per Rx queue Hanumanth Pothula
  2022-09-15  7:07       ` [PATCH v4 2/3] app/testpmd: " Hanumanth Pothula
  2022-09-15  7:07       ` [PATCH v4 3/3] net/cnxk: Add support for mulitiple mbuf pools Hanumanth Pothula
@ 2022-09-28  9:43       ` Andrew Rybchenko
  2022-09-28 11:06         ` Thomas Monjalon
  2022-10-06 17:01       ` [PATCH v5 1/3] ethdev: support " Hanumanth Pothula
  3 siblings, 1 reply; 75+ messages in thread
From: Andrew Rybchenko @ 2022-09-28  9:43 UTC (permalink / raw)
  To: Hanumanth Pothula, Thomas Monjalon, Ferruh Yigit
  Cc: dev, xuan.ding, wenxuanx.wu, xiaoyun.li, stephen, yuanx.wang,
	mdr, yuying.zhang, qi.z.zhang, viacheslavo, jerinj, ndabilpuram,
	Morten Brørup

"Add support for" -> "add support for" or just "support" if
line is long

On 9/15/22 10:07, Hanumanth Pothula wrote:
> This patch adds support for multiple mempool capability.

"This patch adds" -> "Add"

> Some of the HW has support for choosing memory pools based on the
> packet's size. Thiscapability allows PMD to choose a memory pool

Thiscapability -> The capability

> based on the packet's length.
> 
> This is often useful for saving the memory where the application
> can create a different pool to steer the specific size of the
> packet, thus enabling effective use of memory.
> 
> For example, let's say HW has a capability of three pools,
>   - pool-1 size is 2K
>   - pool-2 size is > 2K and < 4K
>   - pool-3 size is > 4K
> Here,
>          pool-1 can accommodate packets with sizes < 2K
>          pool-2 can accommodate packets with sizes > 2K and < 4K
>          pool-3 can accommodate packets with sizes > 4K
> 
> With multiple mempool capability enabled in SW, an application may
> create three pools of different sizes and send them to PMD. Allowing
> PMD to program HW based on the packet lengths. So that packets with
> less than 2K are received on pool-1, packets with lengths between 2K
> and 4K are received on pool-2 and finally packets greater than 4K
> are received on pool-3.
> 
> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>

Please, advertise the new feature in release notes.

[snip]

> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index 1979dc0850..8618d6b01d 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -1634,6 +1634,45 @@ rte_eth_dev_is_removed(uint16_t port_id)
>   	return ret;
>   }
>   
> +static int
> +rte_eth_rx_queue_check_mempool(const struct rte_eth_rx_mempool *rx_mempool,
> +			       uint16_t n_pool, uint32_t *mbp_buf_size,
> +			       const struct rte_eth_dev_info *dev_info)
> +{
> +	uint16_t pool_idx;
> +
> +	if (n_pool > dev_info->max_pools) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid capabilities, max pools supported %u\n",

"Invalid capabilities" sounds misleading. Consider something
like:

"Too many Rx mempools %u vs maximum %u\n", n_pool, dev_info->max_pools

> +			       dev_info->max_pools);
> +		return -EINVAL;
> +	}
> +
> +	for (pool_idx = 0; pool_idx < n_pool; pool_idx++) {
> +		struct rte_mempool *mpl = rx_mempool[pool_idx].mp;
> +
> +		if (mpl == NULL) {
> +			RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");

"null Rx mempool pointer\n"

> +			return -EINVAL;
> +		}
> +
> +		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
> +		if (*mbp_buf_size < dev_info->min_rx_bufsize +
> +		    RTE_PKTMBUF_HEADROOM) {
> +			RTE_ETHDEV_LOG(ERR,
> +				       "%s mbuf_data_room_size %u < %u (RTE_PKTMBUF_HEADROOM=%u + min_rx_bufsize(dev)=%u)\n",
> +					mpl->name, *mbp_buf_size,
> +					RTE_PKTMBUF_HEADROOM + dev_info->min_rx_bufsize,
> +					RTE_PKTMBUF_HEADROOM,
> +					dev_info->min_rx_bufsize);
> +			return -EINVAL;
> +		}
> +

Please, remove extra empty line

> +	}

If Rx scatter is disabled, at least one mempool must be
sufficient for up to MTU packets.

> +
> +	return 0;
> +}
> +
>   static int
>   rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
>   			     uint16_t n_seg, uint32_t *mbp_buf_size,
> @@ -1733,7 +1772,8 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
>   
>   	if (mp != NULL) {
>   		/* Single pool configuration check. */
> -		if (rx_conf != NULL && rx_conf->rx_nseg != 0) {
> +		if (rx_conf != NULL &&
> +		    (rx_conf->rx_nseg != 0 ||  rx_conf->rx_npool)) {

rx_conf->rx_npool != 0 (as DPDK coding style says)

If mp is not NULL, it should be checked that neither buffer
split nor multiple mempool offloads are enabled.
Moreover, I think is a bug in a buffer split which
requires separate pre-patch. Check for rx_nsegs is 0 is
not required in fact since the offload flag must be used.

>   			RTE_ETHDEV_LOG(ERR,
>   				       "Ambiguous segment configuration\n");

segment -> Rx mempools

>   			return -EINVAL;
> @@ -1763,30 +1803,42 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,

>   				       dev_info.min_rx_bufsize);
>   			return -EINVAL;
>   		}
> -	} else {
> -		const struct rte_eth_rxseg_split *rx_seg;
> -		uint16_t n_seg;
> +	} else if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT ||
> +		  rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) {

May be:
(rx_conf->offloads & (RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT |
                       RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) != 0)
However, I'd split this branches to have more clear checks.
If we do not support both buffer split and multi-mempool
simultaneously - it must be checked. Just double check
that another offload is not requested.

>   
> -		/* Extended multi-segment configuration check. */
> -		if (rx_conf == NULL || rx_conf->rx_seg == NULL || rx_conf->rx_nseg == 0) {
> +		/* Extended multi-segment/pool configuration check. */
> +		if (rx_conf == NULL ||
> +		    (rx_conf->rx_seg == NULL && rx_conf->rx_mempool == NULL) ||
> +		    (rx_conf->rx_nseg == 0 && rx_conf->rx_npool == 0)) {

IMHO such generalized checks are wrong. We must check for
corresponding offload flag first.

>   			RTE_ETHDEV_LOG(ERR,
>   				       "Memory pool is null and no extended configuration provided\n");
>   			return -EINVAL;
>   		}
>   
> -		rx_seg = (const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
> -		n_seg = rx_conf->rx_nseg;
> -
>   		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
> +			const struct rte_eth_rxseg_split *rx_seg =
> +				(const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
> +			uint16_t n_seg = rx_conf->rx_nseg;
>   			ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
>   							   &mbp_buf_size,
>   							   &dev_info);
> -			if (ret != 0)
> +			if (ret)

Integers must be checked vs 0 explicitly in DPDK coding style.
Also the change looks unrelated.

>   				return ret;
> -		} else {
> -			RTE_ETHDEV_LOG(ERR, "No Rx segmentation offload configured\n");
> -			return -EINVAL;
>   		}
> +		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) {
> +			const struct rte_eth_rx_mempool *rx_mempool =
> +				(const struct rte_eth_rx_mempool *)rx_conf->rx_mempool;
> +			ret = rte_eth_rx_queue_check_mempool(rx_mempool,
> +							     rx_conf->rx_npool,
> +							     &mbp_buf_size,
> +							     &dev_info);
> +			if (ret)
> +				return ret;
> +
> +		}
> +	} else {
> +		RTE_ETHDEV_LOG(ERR, "No Rx offload is configured\n");

THe log message is misleading. Consider:
"Missing Rx mempool configuration\n"

> +		return -EINVAL;
>   	}
>   
>   	/* Use default specified by driver, if nb_rx_desc is zero */
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index b62ac5bb6f..17deec2cbd 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -1035,6 +1035,11 @@ union rte_eth_rxseg {
>   	/* The other features settings should be added here. */
>   };
>   
> +/* A common structure used to describe mbuf pools per Rx queue */
> +struct rte_eth_rx_mempool {
> +	struct rte_mempool *mp;
> +};

Why do we need it? Can we use below just
    struct rte_mempool *rx_mempools;

> +
>   /**
>    * A structure used to configure an Rx ring of an Ethernet port.
>    */
> @@ -1067,6 +1072,23 @@ struct rte_eth_rxconf {
>   	 */
>   	union rte_eth_rxseg *rx_seg;
>   
> +	/**
> +	 * Points to an array of mempools.
> +	 *

It should be highlighted that drivers should take a look at it
if and only if corresponding offload is enabled for the Rx
queue.

> +	 * This provides support for  multiple mbuf pools per Rx queue.
> +	 *
> +	 * This is often useful for saving the memory where the application can
> +	 * create a different pools to steer the specific size of the packet, thus
> +	 * enabling effective use of memory.
> +	 *
> +	 * Note that on Rx scatter enable, a packet may be delivered using a chain
> +	 * of mbufs obtained from single mempool or multiple mempools based on
> +	 * the NIC implementation.
> +	 *

Remove extra empty line above.

> +	 */
> +	struct rte_eth_rx_mempool *rx_mempool;
> +	uint16_t rx_npool; /** < number of mempools */
> +
>   	uint64_t reserved_64s[2]; /**< Reserved for future fields */
>   	void *reserved_ptrs[2];   /**< Reserved for future fields */
>   };

[snip]

> @@ -1615,6 +1638,7 @@ struct rte_eth_dev_info {
>   	/** Configured number of Rx/Tx queues */
>   	uint16_t nb_rx_queues; /**< Number of Rx queues. */
>   	uint16_t nb_tx_queues; /**< Number of Tx queues. */
> +	uint16_t max_pools;

Description of the new member is missing. Please, add it.

>   	/** Rx parameter recommendations */
>   	struct rte_eth_dev_portconf default_rxportconf;
>   	/** Tx parameter recommendations */


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v4 1/3] ethdev: Add support for mulitiple mbuf pools per Rx queue
  2022-09-28  9:43       ` [PATCH v4 1/3] ethdev: Add support for mulitiple mbuf pools per Rx queue Andrew Rybchenko
@ 2022-09-28 11:06         ` Thomas Monjalon
  0 siblings, 0 replies; 75+ messages in thread
From: Thomas Monjalon @ 2022-09-28 11:06 UTC (permalink / raw)
  To: Hanumanth Pothula, Ferruh Yigit, Andrew Rybchenko
  Cc: dev, xuan.ding, wenxuanx.wu, xiaoyun.li, stephen, yuanx.wang,
	mdr, yuying.zhang, qi.z.zhang, viacheslavo, jerinj, ndabilpuram,
	Morten Brørup

28/09/2022 11:43, Andrew Rybchenko:
> "Add support for" -> "add support for" or just "support" if
> line is long

Even if line is not too long, we all prefer shorter lines,
"add support for" should always be replaced by "support" in titles.
Note I am often doing such change when pulling.





^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v5 1/3] ethdev: support mulitiple mbuf pools per Rx queue
  2022-09-15  7:07     ` [PATCH v4 1/3] ethdev: Add support for mulitiple mbuf pools per Rx queue Hanumanth Pothula
                         ` (2 preceding siblings ...)
  2022-09-28  9:43       ` [PATCH v4 1/3] ethdev: Add support for mulitiple mbuf pools per Rx queue Andrew Rybchenko
@ 2022-10-06 17:01       ` Hanumanth Pothula
  2022-10-06 17:01         ` [PATCH v5 2/3] net/cnxk: " Hanumanth Pothula
                           ` (5 more replies)
  3 siblings, 6 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-10-06 17:01 UTC (permalink / raw)
  To: Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko
  Cc: dev, xuan.ding, wenxuanx.wu, xiaoyun.li, stephen, yuanx.wang,
	mdr, yuying.zhang, qi.z.zhang, viacheslavo, jerinj, ndabilpuram,
	Hanumanth Pothula

This patch adds support for multiple mempool capability.
Some of the HW has support for choosing memory pools based on the
packet's size. The capability allows PMD to choose a memory pool
based on the packet's length.

This is often useful for saving the memory where the application
can create a different pool to steer the specific size of the
packet, thus enabling effective use of memory.

For example, let's say HW has a capability of three pools,
 - pool-1 size is 2K
 - pool-2 size is > 2K and < 4K
 - pool-3 size is > 4K
Here,
        pool-1 can accommodate packets with sizes < 2K
        pool-2 can accommodate packets with sizes > 2K and < 4K
        pool-3 can accommodate packets with sizes > 4K

With multiple mempool capability enabled in SW, an application may
create three pools of different sizes and send them to PMD. Allowing
PMD to program HW based on the packet lengths. So that packets with
less than 2K are received on pool-1, packets with lengths between 2K
and 4K are received on pool-2 and finally packets greater than 4K
are received on pool-3.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
v5:
 - Declared memory pools as struct rte_mempool **rx_mempools rather than
   as struct rte_mempool *mp.
 - Added the feature in release notes.
 - Updated conditions and strings as per review comments.
v4:
 - Renamed Offload capability name from RTE_ETH_RX_OFFLOAD_BUFFER_SORT
   to RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL.
 - In struct rte_eth_rxconf, defined new pointer, which holds array of
   type struct rte_eth_rx_mempool(memory pools). This array is used
   by PMD to program multiple mempools.
v3:
 - Implemented Pool Sort capability as new Rx offload capability,
   RTE_ETH_RX_OFFLOAD_BUFFER_SORT.
v2:
 - Along with spec changes, uploading testpmd and driver changes.
---
 doc/guides/rel_notes/release_22_11.rst |  6 +++
 lib/ethdev/rte_ethdev.c                | 74 ++++++++++++++++++++++----
 lib/ethdev/rte_ethdev.h                | 22 ++++++++
 3 files changed, 92 insertions(+), 10 deletions(-)

diff --git a/doc/guides/rel_notes/release_22_11.rst b/doc/guides/rel_notes/release_22_11.rst
index 2e076ba2ad..26ca22efe0 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -55,6 +55,12 @@ New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+* ** Added support ethdev support for mulitiple mbuf pools per Rx queue.**
+
+  * Added new Rx offload flag ''RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL'' to support
+    mulitiple mbuf pools per Rx queue. Thisi capability allows PMD to choose
+    a memory pool based on the packet's length
+
 * **Updated Wangxun ngbe driver.**
 
   * Added support to set device link down/up.
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 1979dc0850..eed4834e6b 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1634,6 +1634,44 @@ rte_eth_dev_is_removed(uint16_t port_id)
 	return ret;
 }
 
+static int
+rte_eth_rx_queue_check_mempool(struct rte_mempool **rx_mempool,
+			       uint16_t n_pool, uint32_t *mbp_buf_size,
+			       const struct rte_eth_dev_info *dev_info)
+{
+	uint16_t pool_idx;
+
+	if (n_pool > dev_info->max_pools) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Too many Rx mempools %u vs maximum %u\n",
+			       n_pool, dev_info->max_pools);
+		return -EINVAL;
+	}
+
+	for (pool_idx = 0; pool_idx < n_pool; pool_idx++) {
+		struct rte_mempool *mpl = rx_mempool[pool_idx];
+
+		if (mpl == NULL) {
+			RTE_ETHDEV_LOG(ERR, "null Rx mempool pointer\n");
+			return -EINVAL;
+		}
+
+		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
+		if (*mbp_buf_size < dev_info->min_rx_bufsize +
+		    RTE_PKTMBUF_HEADROOM) {
+			RTE_ETHDEV_LOG(ERR,
+				       "%s mbuf_data_room_size %u < %u (RTE_PKTMBUF_HEADROOM=%u + min_rx_bufsize(dev)=%u)\n",
+					mpl->name, *mbp_buf_size,
+					RTE_PKTMBUF_HEADROOM + dev_info->min_rx_bufsize,
+					RTE_PKTMBUF_HEADROOM,
+					dev_info->min_rx_bufsize);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int
 rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 			     uint16_t n_seg, uint32_t *mbp_buf_size,
@@ -1733,9 +1771,12 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 
 	if (mp != NULL) {
 		/* Single pool configuration check. */
-		if (rx_conf != NULL && rx_conf->rx_nseg != 0) {
+		if (((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) &&
+		    rx_conf != NULL && rx_conf->rx_nseg != 0) ||
+		   ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) &&
+		    rx_conf != NULL && rx_conf->rx_npool != 0)) {
 			RTE_ETHDEV_LOG(ERR,
-				       "Ambiguous segment configuration\n");
+				       "Ambiguous Rx mempools configuration\n");
 			return -EINVAL;
 		}
 		/*
@@ -1763,30 +1804,43 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 				       dev_info.min_rx_bufsize);
 			return -EINVAL;
 		}
-	} else {
+	} else if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
 		const struct rte_eth_rxseg_split *rx_seg;
 		uint16_t n_seg;
 
 		/* Extended multi-segment configuration check. */
 		if (rx_conf == NULL || rx_conf->rx_seg == NULL || rx_conf->rx_nseg == 0) {
 			RTE_ETHDEV_LOG(ERR,
-				       "Memory pool is null and no extended configuration provided\n");
+				       "Memory pool is null and no multi-segment configuration provided\n");
 			return -EINVAL;
 		}
 
 		rx_seg = (const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
 		n_seg = rx_conf->rx_nseg;
 
-		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
-			ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
+		ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
 							   &mbp_buf_size,
 							   &dev_info);
-			if (ret != 0)
-				return ret;
-		} else {
-			RTE_ETHDEV_LOG(ERR, "No Rx segmentation offload configured\n");
+		if (ret != 0)
+			return ret;
+	} else if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) {
+		/* Extended multi-pool configuration check. */
+		if (rx_conf == NULL || rx_conf->rx_mempools == NULL || rx_conf->rx_npool == 0) {
+			RTE_ETHDEV_LOG(ERR,
+				       "Memory pool is null and no multi-pool configuration provided\n");
 			return -EINVAL;
 		}
+
+		ret = rte_eth_rx_queue_check_mempool(rx_conf->rx_mempools,
+						     rx_conf->rx_npool,
+						     &mbp_buf_size,
+						     &dev_info);
+
+		if (ret != 0)
+			return ret;
+	} else {
+		RTE_ETHDEV_LOG(ERR, "Missing Rx mempool configuration\n");
+		return -EINVAL;
 	}
 
 	/* Use default specified by driver, if nb_rx_desc is zero */
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index b62ac5bb6f..306c2b3573 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1067,6 +1067,25 @@ struct rte_eth_rxconf {
 	 */
 	union rte_eth_rxseg *rx_seg;
 
+	/**
+	 * Points to an array of mempools.
+	 *
+	 * Valid only when RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL flag is set in
+	 * Rx offloads.
+	 *
+	 * This provides support for  multiple mbuf pools per Rx queue.
+	 *
+	 * This is often useful for saving the memory where the application can
+	 * create a different pools to steer the specific size of the packet, thus
+	 * enabling effective use of memory.
+	 *
+	 * Note that on Rx scatter enable, a packet may be delivered using a chain
+	 * of mbufs obtained from single mempool or multiple mempools based on
+	 * the NIC implementation.
+	 */
+	struct rte_mempool **rx_mempools;
+	uint16_t rx_npool; /** < number of mempools */
+
 	uint64_t reserved_64s[2]; /**< Reserved for future fields */
 	void *reserved_ptrs[2];   /**< Reserved for future fields */
 };
@@ -1395,6 +1414,7 @@ struct rte_eth_conf {
 #define RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM  RTE_BIT64(18)
 #define RTE_ETH_RX_OFFLOAD_RSS_HASH         RTE_BIT64(19)
 #define RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT     RTE_BIT64(20)
+#define RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL      RTE_BIT64(21)
 
 #define RTE_ETH_RX_OFFLOAD_CHECKSUM (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \
 				 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | \
@@ -1615,6 +1635,8 @@ struct rte_eth_dev_info {
 	/** Configured number of Rx/Tx queues */
 	uint16_t nb_rx_queues; /**< Number of Rx queues. */
 	uint16_t nb_tx_queues; /**< Number of Tx queues. */
+	/** Maximum number of pools supported per Rx queue. */
+	uint16_t max_pools;
 	/** Rx parameter recommendations */
 	struct rte_eth_dev_portconf default_rxportconf;
 	/** Tx parameter recommendations */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v5 2/3] net/cnxk: support mulitiple mbuf pools per Rx queue
  2022-10-06 17:01       ` [PATCH v5 1/3] ethdev: support " Hanumanth Pothula
@ 2022-10-06 17:01         ` Hanumanth Pothula
  2022-10-06 17:01         ` [PATCH v5 3/3] app/testpmd: " Hanumanth Pothula
                           ` (4 subsequent siblings)
  5 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-10-06 17:01 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
  Cc: dev, andrew.rybchenko, xuan.ding, wenxuanx.wu, thomas,
	xiaoyun.li, stephen, yuanx.wang, mdr, ferruh.yigit, yuying.zhang,
	qi.z.zhang, viacheslavo, jerinj, Hanumanth Pothula

Presently, HW is programmed only to receive packets from LPB pool.
Making all packets received from LPB pool.

But, CNXK HW supports two pools,
 - SPB -> packets with smaller size (less than 4K)
 - LPB -> packets with bigger size (greater than 4K)

Patch enables multiple mempool capability, pool is selected based
on the packet's length. So, basically, PMD programs HW for receiving
packets from both SPB and LPB pools based on the packet's length.

This is achieved by enabling rx multiple mempool offload,
RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL. This allows the application to send
more than one pool(in our case two) to the driver, with different
segment(packet) lengths, which helps the driver to configure both
pools based on segment lengths.

This is often useful for saving the memory where the application
can create a different pool to steer the specific size of the
packet, thus enabling effective use of memory.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
---
 doc/guides/nics/features/cnxk.ini     |  1 +
 doc/guides/nics/features/cnxk_vec.ini |  1 +
 drivers/net/cnxk/cnxk_ethdev.c        | 84 ++++++++++++++++++++++++---
 drivers/net/cnxk/cnxk_ethdev.h        |  4 +-
 drivers/net/cnxk/cnxk_ethdev_ops.c    |  3 +
 5 files changed, 83 insertions(+), 10 deletions(-)

diff --git a/doc/guides/nics/features/cnxk.ini b/doc/guides/nics/features/cnxk.ini
index 1876fe86c7..ed778ba398 100644
--- a/doc/guides/nics/features/cnxk.ini
+++ b/doc/guides/nics/features/cnxk.ini
@@ -4,6 +4,7 @@
 ; Refer to default.ini for the full list of available PMD features.
 ;
 [Features]
+multiple mempools    = Y
 Speed capabilities   = Y
 Rx interrupt         = Y
 Lock-free Tx queue   = Y
diff --git a/doc/guides/nics/features/cnxk_vec.ini b/doc/guides/nics/features/cnxk_vec.ini
index 5d0976e6ce..c2270fe338 100644
--- a/doc/guides/nics/features/cnxk_vec.ini
+++ b/doc/guides/nics/features/cnxk_vec.ini
@@ -4,6 +4,7 @@
 ; Refer to default.ini for the full list of available PMD features.
 ;
 [Features]
+multiple mempools    = Y
 Speed capabilities   = Y
 Rx interrupt         = Y
 Lock-free Tx queue   = Y
diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c
index ce896338d9..6d525036ff 100644
--- a/drivers/net/cnxk/cnxk_ethdev.c
+++ b/drivers/net/cnxk/cnxk_ethdev.c
@@ -541,6 +541,58 @@ cnxk_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid)
 	plt_free(txq_sp);
 }
 
+static int
+cnxk_nix_process_rx_conf(const struct rte_eth_rxconf *rx_conf,
+			 struct rte_mempool **lpb_pool,
+			 struct rte_mempool **spb_pool)
+{
+	struct rte_mempool *pool0;
+	struct rte_mempool *pool1;
+	struct rte_mempool **mp = rx_conf->rx_mempools;
+	const char *platform_ops;
+	struct rte_mempool_ops *ops;
+
+	if (*lpb_pool ||
+	    rx_conf->rx_npool != CNXK_NIX_NUM_POOLS_MAX) {
+		plt_err("invalid arguments");
+		return -EINVAL;
+	}
+
+	if (mp == NULL || mp[0] == NULL || mp[1] == NULL) {
+		plt_err("invalid memory pools\n");
+		return -EINVAL;
+	}
+
+	pool0 = mp[0];
+	pool1 = mp[1];
+
+	if (pool0->elt_size > pool1->elt_size) {
+		*lpb_pool = pool0;
+		*spb_pool = pool1;
+
+	} else {
+		*lpb_pool = pool1;
+		*spb_pool = pool0;
+	}
+
+	if ((*spb_pool)->pool_id == 0) {
+		plt_err("Invalid pool_id");
+		return -EINVAL;
+	}
+
+	platform_ops = rte_mbuf_platform_mempool_ops();
+	ops = rte_mempool_get_ops((*spb_pool)->ops_index);
+	if (strncmp(ops->name, platform_ops, RTE_MEMPOOL_OPS_NAMESIZE)) {
+		plt_err("mempool ops should be of cnxk_npa type");
+		return -EINVAL;
+	}
+
+	plt_info("spb_pool:%s lpb_pool:%s lpb_len:%u spb_len:%u\n", (*spb_pool)->name,
+		 (*lpb_pool)->name, (*lpb_pool)->elt_size, (*spb_pool)->elt_size);
+
+	return 0;
+}
+
 int
 cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			uint32_t nb_desc, uint16_t fp_rx_q_sz,
@@ -557,6 +609,8 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	uint16_t first_skip;
 	int rc = -EINVAL;
 	size_t rxq_sz;
+	struct rte_mempool *lpb_pool = mp;
+	struct rte_mempool *spb_pool = NULL;
 
 	/* Sanity checks */
 	if (rx_conf->rx_deferred_start == 1) {
@@ -564,15 +618,21 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 		goto fail;
 	}
 
+	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) {
+		rc = cnxk_nix_process_rx_conf(rx_conf, &lpb_pool, &spb_pool);
+		if (rc)
+			goto fail;
+	}
+
 	platform_ops = rte_mbuf_platform_mempool_ops();
 	/* This driver needs cnxk_npa mempool ops to work */
-	ops = rte_mempool_get_ops(mp->ops_index);
+	ops = rte_mempool_get_ops(lpb_pool->ops_index);
 	if (strncmp(ops->name, platform_ops, RTE_MEMPOOL_OPS_NAMESIZE)) {
 		plt_err("mempool ops should be of cnxk_npa type");
 		goto fail;
 	}
 
-	if (mp->pool_id == 0) {
+	if (lpb_pool->pool_id == 0) {
 		plt_err("Invalid pool_id");
 		goto fail;
 	}
@@ -589,13 +649,13 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Its a no-op when inline device is not used */
 	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY ||
 	    dev->tx_offloads & RTE_ETH_TX_OFFLOAD_SECURITY)
-		roc_nix_inl_dev_xaq_realloc(mp->pool_id);
+		roc_nix_inl_dev_xaq_realloc(lpb_pool->pool_id);
 
 	/* Increase CQ size to Aura size to avoid CQ overflow and
 	 * then CPT buffer leak.
 	 */
 	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY)
-		nb_desc = nix_inl_cq_sz_clamp_up(nix, mp, nb_desc);
+		nb_desc = nix_inl_cq_sz_clamp_up(nix, lpb_pool, nb_desc);
 
 	/* Setup ROC CQ */
 	cq = &dev->cqs[qid];
@@ -611,17 +671,17 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	rq = &dev->rqs[qid];
 	rq->qid = qid;
 	rq->cqid = cq->qid;
-	rq->aura_handle = mp->pool_id;
+	rq->aura_handle = lpb_pool->pool_id;
 	rq->flow_tag_width = 32;
 	rq->sso_ena = false;
 
 	/* Calculate first mbuf skip */
 	first_skip = (sizeof(struct rte_mbuf));
 	first_skip += RTE_PKTMBUF_HEADROOM;
-	first_skip += rte_pktmbuf_priv_size(mp);
+	first_skip += rte_pktmbuf_priv_size(lpb_pool);
 	rq->first_skip = first_skip;
 	rq->later_skip = sizeof(struct rte_mbuf);
-	rq->lpb_size = mp->elt_size;
+	rq->lpb_size = lpb_pool->elt_size;
 	if (roc_errata_nix_no_meta_aura())
 		rq->lpb_drop_ena = !(dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY);
 
@@ -629,6 +689,12 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	if (roc_nix_inl_inb_is_enabled(nix))
 		rq->ipsech_ena = true;
 
+	if (spb_pool) {
+		rq->spb_ena = 1;
+		rq->spb_aura_handle = spb_pool->pool_id;
+		rq->spb_size = spb_pool->elt_size;
+	}
+
 	rc = roc_nix_rq_init(&dev->nix, rq, !!eth_dev->data->dev_started);
 	if (rc) {
 		plt_err("Failed to init roc rq for rq=%d, rc=%d", qid, rc);
@@ -651,7 +717,7 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Queue config should reflect global offloads */
 	rxq_sp->qconf.conf.rx.offloads = dev->rx_offloads;
 	rxq_sp->qconf.nb_desc = nb_desc;
-	rxq_sp->qconf.mp = mp;
+	rxq_sp->qconf.mp = lpb_pool;
 	rxq_sp->tc = 0;
 	rxq_sp->tx_pause = (dev->fc_cfg.mode == RTE_ETH_FC_FULL ||
 			    dev->fc_cfg.mode == RTE_ETH_FC_TX_PAUSE);
@@ -670,7 +736,7 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			goto free_mem;
 	}
 
-	plt_nix_dbg("rq=%d pool=%s nb_desc=%d->%d", qid, mp->name, nb_desc,
+	plt_nix_dbg("rq=%d pool=%s nb_desc=%d->%d", qid, lpb_pool->name, nb_desc,
 		    cq->nb_desc);
 
 	/* Store start of fast path area */
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index c09e9bff8e..aedbab85b9 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -44,6 +44,8 @@
 #define CNXK_NIX_RX_DEFAULT_RING_SZ 4096
 /* Max supported SQB count */
 #define CNXK_NIX_TX_MAX_SQB 512
+/* LPB & SPB */
+#define CNXK_NIX_NUM_POOLS_MAX 2
 
 /* If PTP is enabled additional SEND MEM DESC is required which
  * takes 2 words, hence max 7 iova address are possible
@@ -83,7 +85,7 @@
 	 RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM | RTE_ETH_RX_OFFLOAD_SCATTER |    \
 	 RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_RSS_HASH |    \
 	 RTE_ETH_RX_OFFLOAD_TIMESTAMP | RTE_ETH_RX_OFFLOAD_VLAN_STRIP |        \
-	 RTE_ETH_RX_OFFLOAD_SECURITY)
+	 RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL | RTE_ETH_RX_OFFLOAD_SECURITY)
 
 #define RSS_IPV4_ENABLE                                                        \
 	(RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |                            \
diff --git a/drivers/net/cnxk/cnxk_ethdev_ops.c b/drivers/net/cnxk/cnxk_ethdev_ops.c
index 07c744bf64..bfe9199537 100644
--- a/drivers/net/cnxk/cnxk_ethdev_ops.c
+++ b/drivers/net/cnxk/cnxk_ethdev_ops.c
@@ -69,6 +69,9 @@ cnxk_nix_info_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *devinfo)
 	devinfo->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
 			    RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP |
 			    RTE_ETH_DEV_CAPA_FLOW_RULE_KEEP;
+
+	devinfo->max_pools = CNXK_NIX_NUM_POOLS_MAX;
+
 	return 0;
 }
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v5 3/3] app/testpmd: support mulitiple mbuf pools per Rx queue
  2022-10-06 17:01       ` [PATCH v5 1/3] ethdev: support " Hanumanth Pothula
  2022-10-06 17:01         ` [PATCH v5 2/3] net/cnxk: " Hanumanth Pothula
@ 2022-10-06 17:01         ` Hanumanth Pothula
  2022-10-06 17:29         ` [PATCH v5 1/3] ethdev: " Stephen Hemminger
                           ` (3 subsequent siblings)
  5 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-10-06 17:01 UTC (permalink / raw)
  To: Aman Singh, Yuying Zhang
  Cc: dev, andrew.rybchenko, xuan.ding, wenxuanx.wu, thomas,
	xiaoyun.li, stephen, yuanx.wang, mdr, ferruh.yigit, qi.z.zhang,
	viacheslavo, jerinj, ndabilpuram, Hanumanth Pothula

This patch adds support for the mulitiple mempool.
Some of the HW has support for choosing memory pools based on the
packet's size. The pool sort capability allows PMD to choose a
memory pool based on the packet's length.

On multiple mempool support enabled, populate mempool array and
also print pool name on which packet is received.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
---
 app/test-pmd/testpmd.c | 44 ++++++++++++++++++++++++++++++------------
 app/test-pmd/testpmd.h |  3 +++
 app/test-pmd/util.c    |  4 ++--
 3 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 77741fc41f..1dbddf7b43 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2624,11 +2624,13 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
 {
 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
+	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
 	unsigned int i, mp_n;
 	int ret;
 
 	if (rx_pkt_nb_segs <= 1 ||
-	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
+	    (rx_conf->offloads & (RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT |
+	     RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL)) == 0) {
 		rx_conf->rx_seg = NULL;
 		rx_conf->rx_nseg = 0;
 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
@@ -2637,7 +2639,9 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		goto exit;
 	}
 	for (i = 0; i < rx_pkt_nb_segs; i++) {
-		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
+		struct rte_eth_rxseg_split *rx_split = &rx_useg[i].split;
+		struct rte_mempool *mempool;
+
 		struct rte_mempool *mpx;
 		/*
 		 * Use last valid pool for the segments with number
@@ -2645,16 +2649,32 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		 */
 		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
 		mpx = mbuf_pool_find(socket_id, mp_n);
-		/* Handle zero as mbuf data buffer size. */
-		rx_seg->length = rx_pkt_seg_lengths[i] ?
-				   rx_pkt_seg_lengths[i] :
-				   mbuf_data_size[mp_n];
-		rx_seg->offset = i < rx_pkt_nb_offs ?
-				   rx_pkt_seg_offsets[i] : 0;
-		rx_seg->mp = mpx ? mpx : mp;
-	}
-	rx_conf->rx_nseg = rx_pkt_nb_segs;
-	rx_conf->rx_seg = rx_useg;
+		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+			/**
+			 * On Segment length zero, update length as,
+			 *      buffer size - headroom size
+			 * to make sure enough space is accomidate for header.
+			 */
+			rx_split->length = rx_pkt_seg_lengths[i] ?
+					   rx_pkt_seg_lengths[i] :
+					   mbuf_data_size[mp_n] - RTE_PKTMBUF_HEADROOM;
+			rx_split->offset = i < rx_pkt_nb_offs ?
+					   rx_pkt_seg_offsets[i] : 0;
+			rx_split->mp = mpx ? mpx : mp;
+		}
+		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) {
+			mempool = mpx ? mpx : mp;
+			rx_mempool[i] = mempool;
+		}
+	}
+	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+		rx_conf->rx_nseg = rx_pkt_nb_segs;
+		rx_conf->rx_seg = rx_useg;
+	}
+	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) {
+		rx_conf->rx_mempools = rx_mempool;
+		rx_conf->rx_npool = rx_pkt_nb_segs;
+	}
 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
 				    socket_id, rx_conf, NULL);
 	rx_conf->rx_seg = NULL;
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index ddf5e21849..15a26171e2 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -82,6 +82,9 @@ extern uint8_t cl_quit;
 
 #define MIN_TOTAL_NUM_MBUFS 1024
 
+/* Maximum number of pools supprted per Rx queue */
+#define MAX_MEMPOOL 8
+
 typedef uint8_t  lcoreid_t;
 typedef uint16_t portid_t;
 typedef uint16_t queueid_t;
diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index fd98e8b51d..f9df5f69ef 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
 				 print_buf, buf_size, &cur_len);
 		MKDUMPSTR(print_buf, buf_size, cur_len,
-			  " - type=0x%04x - length=%u - nb_segs=%d",
-			  eth_type, (unsigned int) mb->pkt_len,
+			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
+			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
 		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v5 1/3] ethdev: support mulitiple mbuf pools per Rx queue
  2022-10-06 17:01       ` [PATCH v5 1/3] ethdev: support " Hanumanth Pothula
  2022-10-06 17:01         ` [PATCH v5 2/3] net/cnxk: " Hanumanth Pothula
  2022-10-06 17:01         ` [PATCH v5 3/3] app/testpmd: " Hanumanth Pothula
@ 2022-10-06 17:29         ` Stephen Hemminger
  2022-10-07 14:13           ` Andrew Rybchenko
  2022-10-06 17:53         ` [PATCH v6 " Hanumanth Pothula
                           ` (2 subsequent siblings)
  5 siblings, 1 reply; 75+ messages in thread
From: Stephen Hemminger @ 2022-10-06 17:29 UTC (permalink / raw)
  To: Hanumanth Pothula
  Cc: Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko, dev, xuan.ding,
	wenxuanx.wu, xiaoyun.li, yuanx.wang, mdr, yuying.zhang,
	qi.z.zhang, viacheslavo, jerinj, ndabilpuram

On Thu, 6 Oct 2022 22:31:24 +0530
Hanumanth Pothula <hpothula@marvell.com> wrote:

> +	/**
> +	 * Points to an array of mempools.
> +	 *
> +	 * Valid only when RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL flag is set in
> +	 * Rx offloads.
> +	 *
> +	 * This provides support for  multiple mbuf pools per Rx queue.
> +	 *
> +	 * This is often useful for saving the memory where the application can
> +	 * create a different pools to steer the specific size of the packet, thus
> +	 * enabling effective use of memory.
> +	 *
> +	 * Note that on Rx scatter enable, a packet may be delivered using a chain
> +	 * of mbufs obtained from single mempool or multiple mempools based on
> +	 * the NIC implementation.
> +	 */
> +	struct rte_mempool **rx_mempools;
> +	uint16_t rx_npool; /** < number of mempools */
> +
>  	uint64_t reserved_64s[2]; /**< Reserved for future fields */
>  	void *reserved_ptrs[2];   /**< Reserved for future fields */

Better and safer to just take up some of those existing reserved fields.


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v6 1/3] ethdev: support mulitiple mbuf pools per Rx queue
  2022-10-06 17:01       ` [PATCH v5 1/3] ethdev: support " Hanumanth Pothula
                           ` (2 preceding siblings ...)
  2022-10-06 17:29         ` [PATCH v5 1/3] ethdev: " Stephen Hemminger
@ 2022-10-06 17:53         ` Hanumanth Pothula
  2022-10-06 17:53           ` [PATCH v6 2/3] net/cnxk: " Hanumanth Pothula
                             ` (2 more replies)
  2022-10-07 14:37         ` [PATCH v7 0/4] " Andrew Rybchenko
  2022-10-07 17:29         ` [PATCH v8 0/4] ethdev: " Andrew Rybchenko
  5 siblings, 3 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-10-06 17:53 UTC (permalink / raw)
  To: Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko
  Cc: dev, xuan.ding, wenxuanx.wu, xiaoyun.li, stephen, yuanx.wang,
	mdr, yuying.zhang, qi.z.zhang, viacheslavo, jerinj, ndabilpuram,
	Hanumanth Pothula

This patch adds support for multiple mempool capability.
Some of the HW has support for choosing memory pools based on the
packet's size. The capability allows PMD to choose a memory pool
based on the packet's length.

This is often useful for saving the memory where the application
can create a different pool to steer the specific size of the
packet, thus enabling effective use of memory.

For example, let's say HW has a capability of three pools,
 - pool-1 size is 2K
 - pool-2 size is > 2K and < 4K
 - pool-3 size is > 4K
Here,
        pool-1 can accommodate packets with sizes < 2K
        pool-2 can accommodate packets with sizes > 2K and < 4K
        pool-3 can accommodate packets with sizes > 4K

With multiple mempool capability enabled in SW, an application may
create three pools of different sizes and send them to PMD. Allowing
PMD to program HW based on the packet lengths. So that packets with
less than 2K are received on pool-1, packets with lengths between 2K
and 4K are received on pool-2 and finally packets greater than 4K
are received on pool-3.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>

v6:
 - Updated release notes, release_22_11.rst.
v5:
 - Declared memory pools as struct rte_mempool **rx_mempools rather than
   as struct rte_mempool *mp.
 - Added the feature in release notes.
 - Updated conditions and strings as per review comments.
v4:
 - Renamed Offload capability name from RTE_ETH_RX_OFFLOAD_BUFFER_SORT
   to RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL.
 - In struct rte_eth_rxconf, defined new pointer, which holds array of
   type struct rte_eth_rx_mempool(memory pools). This array is used
   by PMD to program multiple mempools.
v3:
 - Implemented Pool Sort capability as new Rx offload capability,
   RTE_ETH_RX_OFFLOAD_BUFFER_SORT.
v2:
 - Along with spec changes, uploading testpmd and driver changes.
---
 doc/guides/rel_notes/release_22_11.rst |  6 +++
 lib/ethdev/rte_ethdev.c                | 74 ++++++++++++++++++++++----
 lib/ethdev/rte_ethdev.h                | 22 ++++++++
 3 files changed, 92 insertions(+), 10 deletions(-)

diff --git a/doc/guides/rel_notes/release_22_11.rst b/doc/guides/rel_notes/release_22_11.rst
index 2e076ba2ad..8bb19155d9 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -55,6 +55,12 @@ New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+* ** Added support ethdev support for mulitiple mbuf pools per Rx queue.**
+
+  * Added new Rx offload flag ``RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL`` to support
+    mulitiple mbuf pools per Rx queue. Thisi capability allows PMD to choose
+    a memory pool based on the packet's length
+
 * **Updated Wangxun ngbe driver.**
 
   * Added support to set device link down/up.
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 1979dc0850..eed4834e6b 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1634,6 +1634,44 @@ rte_eth_dev_is_removed(uint16_t port_id)
 	return ret;
 }
 
+static int
+rte_eth_rx_queue_check_mempool(struct rte_mempool **rx_mempool,
+			       uint16_t n_pool, uint32_t *mbp_buf_size,
+			       const struct rte_eth_dev_info *dev_info)
+{
+	uint16_t pool_idx;
+
+	if (n_pool > dev_info->max_pools) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Too many Rx mempools %u vs maximum %u\n",
+			       n_pool, dev_info->max_pools);
+		return -EINVAL;
+	}
+
+	for (pool_idx = 0; pool_idx < n_pool; pool_idx++) {
+		struct rte_mempool *mpl = rx_mempool[pool_idx];
+
+		if (mpl == NULL) {
+			RTE_ETHDEV_LOG(ERR, "null Rx mempool pointer\n");
+			return -EINVAL;
+		}
+
+		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
+		if (*mbp_buf_size < dev_info->min_rx_bufsize +
+		    RTE_PKTMBUF_HEADROOM) {
+			RTE_ETHDEV_LOG(ERR,
+				       "%s mbuf_data_room_size %u < %u (RTE_PKTMBUF_HEADROOM=%u + min_rx_bufsize(dev)=%u)\n",
+					mpl->name, *mbp_buf_size,
+					RTE_PKTMBUF_HEADROOM + dev_info->min_rx_bufsize,
+					RTE_PKTMBUF_HEADROOM,
+					dev_info->min_rx_bufsize);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int
 rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 			     uint16_t n_seg, uint32_t *mbp_buf_size,
@@ -1733,9 +1771,12 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 
 	if (mp != NULL) {
 		/* Single pool configuration check. */
-		if (rx_conf != NULL && rx_conf->rx_nseg != 0) {
+		if (((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) &&
+		    rx_conf != NULL && rx_conf->rx_nseg != 0) ||
+		   ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) &&
+		    rx_conf != NULL && rx_conf->rx_npool != 0)) {
 			RTE_ETHDEV_LOG(ERR,
-				       "Ambiguous segment configuration\n");
+				       "Ambiguous Rx mempools configuration\n");
 			return -EINVAL;
 		}
 		/*
@@ -1763,30 +1804,43 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 				       dev_info.min_rx_bufsize);
 			return -EINVAL;
 		}
-	} else {
+	} else if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
 		const struct rte_eth_rxseg_split *rx_seg;
 		uint16_t n_seg;
 
 		/* Extended multi-segment configuration check. */
 		if (rx_conf == NULL || rx_conf->rx_seg == NULL || rx_conf->rx_nseg == 0) {
 			RTE_ETHDEV_LOG(ERR,
-				       "Memory pool is null and no extended configuration provided\n");
+				       "Memory pool is null and no multi-segment configuration provided\n");
 			return -EINVAL;
 		}
 
 		rx_seg = (const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
 		n_seg = rx_conf->rx_nseg;
 
-		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
-			ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
+		ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
 							   &mbp_buf_size,
 							   &dev_info);
-			if (ret != 0)
-				return ret;
-		} else {
-			RTE_ETHDEV_LOG(ERR, "No Rx segmentation offload configured\n");
+		if (ret != 0)
+			return ret;
+	} else if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) {
+		/* Extended multi-pool configuration check. */
+		if (rx_conf == NULL || rx_conf->rx_mempools == NULL || rx_conf->rx_npool == 0) {
+			RTE_ETHDEV_LOG(ERR,
+				       "Memory pool is null and no multi-pool configuration provided\n");
 			return -EINVAL;
 		}
+
+		ret = rte_eth_rx_queue_check_mempool(rx_conf->rx_mempools,
+						     rx_conf->rx_npool,
+						     &mbp_buf_size,
+						     &dev_info);
+
+		if (ret != 0)
+			return ret;
+	} else {
+		RTE_ETHDEV_LOG(ERR, "Missing Rx mempool configuration\n");
+		return -EINVAL;
 	}
 
 	/* Use default specified by driver, if nb_rx_desc is zero */
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index b62ac5bb6f..306c2b3573 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1067,6 +1067,25 @@ struct rte_eth_rxconf {
 	 */
 	union rte_eth_rxseg *rx_seg;
 
+	/**
+	 * Points to an array of mempools.
+	 *
+	 * Valid only when RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL flag is set in
+	 * Rx offloads.
+	 *
+	 * This provides support for  multiple mbuf pools per Rx queue.
+	 *
+	 * This is often useful for saving the memory where the application can
+	 * create a different pools to steer the specific size of the packet, thus
+	 * enabling effective use of memory.
+	 *
+	 * Note that on Rx scatter enable, a packet may be delivered using a chain
+	 * of mbufs obtained from single mempool or multiple mempools based on
+	 * the NIC implementation.
+	 */
+	struct rte_mempool **rx_mempools;
+	uint16_t rx_npool; /** < number of mempools */
+
 	uint64_t reserved_64s[2]; /**< Reserved for future fields */
 	void *reserved_ptrs[2];   /**< Reserved for future fields */
 };
@@ -1395,6 +1414,7 @@ struct rte_eth_conf {
 #define RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM  RTE_BIT64(18)
 #define RTE_ETH_RX_OFFLOAD_RSS_HASH         RTE_BIT64(19)
 #define RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT     RTE_BIT64(20)
+#define RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL      RTE_BIT64(21)
 
 #define RTE_ETH_RX_OFFLOAD_CHECKSUM (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \
 				 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | \
@@ -1615,6 +1635,8 @@ struct rte_eth_dev_info {
 	/** Configured number of Rx/Tx queues */
 	uint16_t nb_rx_queues; /**< Number of Rx queues. */
 	uint16_t nb_tx_queues; /**< Number of Tx queues. */
+	/** Maximum number of pools supported per Rx queue. */
+	uint16_t max_pools;
 	/** Rx parameter recommendations */
 	struct rte_eth_dev_portconf default_rxportconf;
 	/** Tx parameter recommendations */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v6 2/3] net/cnxk: support mulitiple mbuf pools per Rx queue
  2022-10-06 17:53         ` [PATCH v6 " Hanumanth Pothula
@ 2022-10-06 17:53           ` Hanumanth Pothula
  2022-10-06 17:53           ` [PATCH v6 3/3] app/testpmd: " Hanumanth Pothula
  2022-10-06 18:14           ` [PATCH v6 1/3] ethdev: " Hanumanth Reddy Pothula
  2 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-10-06 17:53 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
  Cc: dev, andrew.rybchenko, xuan.ding, wenxuanx.wu, thomas,
	xiaoyun.li, stephen, yuanx.wang, mdr, ferruh.yigit, yuying.zhang,
	qi.z.zhang, viacheslavo, jerinj, Hanumanth Pothula

Presently, HW is programmed only to receive packets from LPB pool.
Making all packets received from LPB pool.

But, CNXK HW supports two pools,
 - SPB -> packets with smaller size (less than 4K)
 - LPB -> packets with bigger size (greater than 4K)

Patch enables multiple mempool capability, pool is selected based
on the packet's length. So, basically, PMD programs HW for receiving
packets from both SPB and LPB pools based on the packet's length.

This is achieved by enabling rx multiple mempool offload,
RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL. This allows the application to send
more than one pool(in our case two) to the driver, with different
segment(packet) lengths, which helps the driver to configure both
pools based on segment lengths.

This is often useful for saving the memory where the application
can create a different pool to steer the specific size of the
packet, thus enabling effective use of memory.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
---
 doc/guides/nics/features/cnxk.ini     |  1 +
 doc/guides/nics/features/cnxk_vec.ini |  1 +
 drivers/net/cnxk/cnxk_ethdev.c        | 84 ++++++++++++++++++++++++---
 drivers/net/cnxk/cnxk_ethdev.h        |  4 +-
 drivers/net/cnxk/cnxk_ethdev_ops.c    |  3 +
 5 files changed, 83 insertions(+), 10 deletions(-)

diff --git a/doc/guides/nics/features/cnxk.ini b/doc/guides/nics/features/cnxk.ini
index 1876fe86c7..ed778ba398 100644
--- a/doc/guides/nics/features/cnxk.ini
+++ b/doc/guides/nics/features/cnxk.ini
@@ -4,6 +4,7 @@
 ; Refer to default.ini for the full list of available PMD features.
 ;
 [Features]
+multiple mempools    = Y
 Speed capabilities   = Y
 Rx interrupt         = Y
 Lock-free Tx queue   = Y
diff --git a/doc/guides/nics/features/cnxk_vec.ini b/doc/guides/nics/features/cnxk_vec.ini
index 5d0976e6ce..c2270fe338 100644
--- a/doc/guides/nics/features/cnxk_vec.ini
+++ b/doc/guides/nics/features/cnxk_vec.ini
@@ -4,6 +4,7 @@
 ; Refer to default.ini for the full list of available PMD features.
 ;
 [Features]
+multiple mempools    = Y
 Speed capabilities   = Y
 Rx interrupt         = Y
 Lock-free Tx queue   = Y
diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c
index ce896338d9..6d525036ff 100644
--- a/drivers/net/cnxk/cnxk_ethdev.c
+++ b/drivers/net/cnxk/cnxk_ethdev.c
@@ -541,6 +541,58 @@ cnxk_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid)
 	plt_free(txq_sp);
 }
 
+static int
+cnxk_nix_process_rx_conf(const struct rte_eth_rxconf *rx_conf,
+			 struct rte_mempool **lpb_pool,
+			 struct rte_mempool **spb_pool)
+{
+	struct rte_mempool *pool0;
+	struct rte_mempool *pool1;
+	struct rte_mempool **mp = rx_conf->rx_mempools;
+	const char *platform_ops;
+	struct rte_mempool_ops *ops;
+
+	if (*lpb_pool ||
+	    rx_conf->rx_npool != CNXK_NIX_NUM_POOLS_MAX) {
+		plt_err("invalid arguments");
+		return -EINVAL;
+	}
+
+	if (mp == NULL || mp[0] == NULL || mp[1] == NULL) {
+		plt_err("invalid memory pools\n");
+		return -EINVAL;
+	}
+
+	pool0 = mp[0];
+	pool1 = mp[1];
+
+	if (pool0->elt_size > pool1->elt_size) {
+		*lpb_pool = pool0;
+		*spb_pool = pool1;
+
+	} else {
+		*lpb_pool = pool1;
+		*spb_pool = pool0;
+	}
+
+	if ((*spb_pool)->pool_id == 0) {
+		plt_err("Invalid pool_id");
+		return -EINVAL;
+	}
+
+	platform_ops = rte_mbuf_platform_mempool_ops();
+	ops = rte_mempool_get_ops((*spb_pool)->ops_index);
+	if (strncmp(ops->name, platform_ops, RTE_MEMPOOL_OPS_NAMESIZE)) {
+		plt_err("mempool ops should be of cnxk_npa type");
+		return -EINVAL;
+	}
+
+	plt_info("spb_pool:%s lpb_pool:%s lpb_len:%u spb_len:%u\n", (*spb_pool)->name,
+		 (*lpb_pool)->name, (*lpb_pool)->elt_size, (*spb_pool)->elt_size);
+
+	return 0;
+}
+
 int
 cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			uint32_t nb_desc, uint16_t fp_rx_q_sz,
@@ -557,6 +609,8 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	uint16_t first_skip;
 	int rc = -EINVAL;
 	size_t rxq_sz;
+	struct rte_mempool *lpb_pool = mp;
+	struct rte_mempool *spb_pool = NULL;
 
 	/* Sanity checks */
 	if (rx_conf->rx_deferred_start == 1) {
@@ -564,15 +618,21 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 		goto fail;
 	}
 
+	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) {
+		rc = cnxk_nix_process_rx_conf(rx_conf, &lpb_pool, &spb_pool);
+		if (rc)
+			goto fail;
+	}
+
 	platform_ops = rte_mbuf_platform_mempool_ops();
 	/* This driver needs cnxk_npa mempool ops to work */
-	ops = rte_mempool_get_ops(mp->ops_index);
+	ops = rte_mempool_get_ops(lpb_pool->ops_index);
 	if (strncmp(ops->name, platform_ops, RTE_MEMPOOL_OPS_NAMESIZE)) {
 		plt_err("mempool ops should be of cnxk_npa type");
 		goto fail;
 	}
 
-	if (mp->pool_id == 0) {
+	if (lpb_pool->pool_id == 0) {
 		plt_err("Invalid pool_id");
 		goto fail;
 	}
@@ -589,13 +649,13 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Its a no-op when inline device is not used */
 	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY ||
 	    dev->tx_offloads & RTE_ETH_TX_OFFLOAD_SECURITY)
-		roc_nix_inl_dev_xaq_realloc(mp->pool_id);
+		roc_nix_inl_dev_xaq_realloc(lpb_pool->pool_id);
 
 	/* Increase CQ size to Aura size to avoid CQ overflow and
 	 * then CPT buffer leak.
 	 */
 	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY)
-		nb_desc = nix_inl_cq_sz_clamp_up(nix, mp, nb_desc);
+		nb_desc = nix_inl_cq_sz_clamp_up(nix, lpb_pool, nb_desc);
 
 	/* Setup ROC CQ */
 	cq = &dev->cqs[qid];
@@ -611,17 +671,17 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	rq = &dev->rqs[qid];
 	rq->qid = qid;
 	rq->cqid = cq->qid;
-	rq->aura_handle = mp->pool_id;
+	rq->aura_handle = lpb_pool->pool_id;
 	rq->flow_tag_width = 32;
 	rq->sso_ena = false;
 
 	/* Calculate first mbuf skip */
 	first_skip = (sizeof(struct rte_mbuf));
 	first_skip += RTE_PKTMBUF_HEADROOM;
-	first_skip += rte_pktmbuf_priv_size(mp);
+	first_skip += rte_pktmbuf_priv_size(lpb_pool);
 	rq->first_skip = first_skip;
 	rq->later_skip = sizeof(struct rte_mbuf);
-	rq->lpb_size = mp->elt_size;
+	rq->lpb_size = lpb_pool->elt_size;
 	if (roc_errata_nix_no_meta_aura())
 		rq->lpb_drop_ena = !(dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY);
 
@@ -629,6 +689,12 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	if (roc_nix_inl_inb_is_enabled(nix))
 		rq->ipsech_ena = true;
 
+	if (spb_pool) {
+		rq->spb_ena = 1;
+		rq->spb_aura_handle = spb_pool->pool_id;
+		rq->spb_size = spb_pool->elt_size;
+	}
+
 	rc = roc_nix_rq_init(&dev->nix, rq, !!eth_dev->data->dev_started);
 	if (rc) {
 		plt_err("Failed to init roc rq for rq=%d, rc=%d", qid, rc);
@@ -651,7 +717,7 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Queue config should reflect global offloads */
 	rxq_sp->qconf.conf.rx.offloads = dev->rx_offloads;
 	rxq_sp->qconf.nb_desc = nb_desc;
-	rxq_sp->qconf.mp = mp;
+	rxq_sp->qconf.mp = lpb_pool;
 	rxq_sp->tc = 0;
 	rxq_sp->tx_pause = (dev->fc_cfg.mode == RTE_ETH_FC_FULL ||
 			    dev->fc_cfg.mode == RTE_ETH_FC_TX_PAUSE);
@@ -670,7 +736,7 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			goto free_mem;
 	}
 
-	plt_nix_dbg("rq=%d pool=%s nb_desc=%d->%d", qid, mp->name, nb_desc,
+	plt_nix_dbg("rq=%d pool=%s nb_desc=%d->%d", qid, lpb_pool->name, nb_desc,
 		    cq->nb_desc);
 
 	/* Store start of fast path area */
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index c09e9bff8e..aedbab85b9 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -44,6 +44,8 @@
 #define CNXK_NIX_RX_DEFAULT_RING_SZ 4096
 /* Max supported SQB count */
 #define CNXK_NIX_TX_MAX_SQB 512
+/* LPB & SPB */
+#define CNXK_NIX_NUM_POOLS_MAX 2
 
 /* If PTP is enabled additional SEND MEM DESC is required which
  * takes 2 words, hence max 7 iova address are possible
@@ -83,7 +85,7 @@
 	 RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM | RTE_ETH_RX_OFFLOAD_SCATTER |    \
 	 RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_RSS_HASH |    \
 	 RTE_ETH_RX_OFFLOAD_TIMESTAMP | RTE_ETH_RX_OFFLOAD_VLAN_STRIP |        \
-	 RTE_ETH_RX_OFFLOAD_SECURITY)
+	 RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL | RTE_ETH_RX_OFFLOAD_SECURITY)
 
 #define RSS_IPV4_ENABLE                                                        \
 	(RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |                            \
diff --git a/drivers/net/cnxk/cnxk_ethdev_ops.c b/drivers/net/cnxk/cnxk_ethdev_ops.c
index 07c744bf64..bfe9199537 100644
--- a/drivers/net/cnxk/cnxk_ethdev_ops.c
+++ b/drivers/net/cnxk/cnxk_ethdev_ops.c
@@ -69,6 +69,9 @@ cnxk_nix_info_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *devinfo)
 	devinfo->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
 			    RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP |
 			    RTE_ETH_DEV_CAPA_FLOW_RULE_KEEP;
+
+	devinfo->max_pools = CNXK_NIX_NUM_POOLS_MAX;
+
 	return 0;
 }
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v6 3/3] app/testpmd: support mulitiple mbuf pools per Rx queue
  2022-10-06 17:53         ` [PATCH v6 " Hanumanth Pothula
  2022-10-06 17:53           ` [PATCH v6 2/3] net/cnxk: " Hanumanth Pothula
@ 2022-10-06 17:53           ` Hanumanth Pothula
  2022-10-06 18:14           ` [PATCH v6 1/3] ethdev: " Hanumanth Reddy Pothula
  2 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-10-06 17:53 UTC (permalink / raw)
  To: Aman Singh, Yuying Zhang
  Cc: dev, andrew.rybchenko, xuan.ding, wenxuanx.wu, thomas,
	xiaoyun.li, stephen, yuanx.wang, mdr, ferruh.yigit, qi.z.zhang,
	viacheslavo, jerinj, ndabilpuram, Hanumanth Pothula

This patch adds support for the mulitiple mempool.
Some of the HW has support for choosing memory pools based on the
packet's size. The pool sort capability allows PMD to choose a
memory pool based on the packet's length.

On multiple mempool support enabled, populate mempool array and
also print pool name on which packet is received.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
---
 app/test-pmd/testpmd.c | 44 ++++++++++++++++++++++++++++++------------
 app/test-pmd/testpmd.h |  3 +++
 app/test-pmd/util.c    |  4 ++--
 3 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 77741fc41f..1dbddf7b43 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2624,11 +2624,13 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
 {
 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
+	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
 	unsigned int i, mp_n;
 	int ret;
 
 	if (rx_pkt_nb_segs <= 1 ||
-	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
+	    (rx_conf->offloads & (RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT |
+	     RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL)) == 0) {
 		rx_conf->rx_seg = NULL;
 		rx_conf->rx_nseg = 0;
 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
@@ -2637,7 +2639,9 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		goto exit;
 	}
 	for (i = 0; i < rx_pkt_nb_segs; i++) {
-		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
+		struct rte_eth_rxseg_split *rx_split = &rx_useg[i].split;
+		struct rte_mempool *mempool;
+
 		struct rte_mempool *mpx;
 		/*
 		 * Use last valid pool for the segments with number
@@ -2645,16 +2649,32 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		 */
 		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
 		mpx = mbuf_pool_find(socket_id, mp_n);
-		/* Handle zero as mbuf data buffer size. */
-		rx_seg->length = rx_pkt_seg_lengths[i] ?
-				   rx_pkt_seg_lengths[i] :
-				   mbuf_data_size[mp_n];
-		rx_seg->offset = i < rx_pkt_nb_offs ?
-				   rx_pkt_seg_offsets[i] : 0;
-		rx_seg->mp = mpx ? mpx : mp;
-	}
-	rx_conf->rx_nseg = rx_pkt_nb_segs;
-	rx_conf->rx_seg = rx_useg;
+		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+			/**
+			 * On Segment length zero, update length as,
+			 *      buffer size - headroom size
+			 * to make sure enough space is accomidate for header.
+			 */
+			rx_split->length = rx_pkt_seg_lengths[i] ?
+					   rx_pkt_seg_lengths[i] :
+					   mbuf_data_size[mp_n] - RTE_PKTMBUF_HEADROOM;
+			rx_split->offset = i < rx_pkt_nb_offs ?
+					   rx_pkt_seg_offsets[i] : 0;
+			rx_split->mp = mpx ? mpx : mp;
+		}
+		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) {
+			mempool = mpx ? mpx : mp;
+			rx_mempool[i] = mempool;
+		}
+	}
+	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+		rx_conf->rx_nseg = rx_pkt_nb_segs;
+		rx_conf->rx_seg = rx_useg;
+	}
+	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) {
+		rx_conf->rx_mempools = rx_mempool;
+		rx_conf->rx_npool = rx_pkt_nb_segs;
+	}
 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
 				    socket_id, rx_conf, NULL);
 	rx_conf->rx_seg = NULL;
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index ddf5e21849..15a26171e2 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -82,6 +82,9 @@ extern uint8_t cl_quit;
 
 #define MIN_TOTAL_NUM_MBUFS 1024
 
+/* Maximum number of pools supprted per Rx queue */
+#define MAX_MEMPOOL 8
+
 typedef uint8_t  lcoreid_t;
 typedef uint16_t portid_t;
 typedef uint16_t queueid_t;
diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index fd98e8b51d..f9df5f69ef 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
 				 print_buf, buf_size, &cur_len);
 		MKDUMPSTR(print_buf, buf_size, cur_len,
-			  " - type=0x%04x - length=%u - nb_segs=%d",
-			  eth_type, (unsigned int) mb->pkt_len,
+			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
+			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
 		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [PATCH v6 1/3] ethdev: support mulitiple mbuf pools per Rx queue
  2022-10-06 17:53         ` [PATCH v6 " Hanumanth Pothula
  2022-10-06 17:53           ` [PATCH v6 2/3] net/cnxk: " Hanumanth Pothula
  2022-10-06 17:53           ` [PATCH v6 3/3] app/testpmd: " Hanumanth Pothula
@ 2022-10-06 18:14           ` Hanumanth Reddy Pothula
  2 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Reddy Pothula @ 2022-10-06 18:14 UTC (permalink / raw)
  To: Hanumanth Reddy Pothula, Thomas Monjalon, Ferruh Yigit, Andrew Rybchenko
  Cc: dev, xuan.ding, xiaoyun.li, stephen, yuanx.wang, mdr,
	yuying.zhang, qi.z.zhang, viacheslavo, Jerin Jacob Kollanukkaran,
	Nithin Kumar Dabilpuram

Hello Andrew/Thomas/Ferruh,

Thank You for your comments. I have taken care and uploaded new patch-set.
As tomorrow is RC1 release. Could you please help in reviewing the changes and in merging the changes, if changes looks good.

Regards,
Hanumanth 

> -----Original Message-----
> From: Hanumanth Pothula <hpothula@marvell.com>
> Sent: Thursday, October 6, 2022 11:24 PM
> To: Thomas Monjalon <thomas@monjalon.net>; Ferruh Yigit
> <ferruh.yigit@xilinx.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>
> Cc: dev@dpdk.org; xuan.ding@intel.com; wenxuanx.wu@intel.com;
> xiaoyun.li@intel.com; stephen@networkplumber.org; yuanx.wang@intel.com;
> mdr@ashroe.eu; yuying.zhang@intel.com; qi.z.zhang@intel.com;
> viacheslavo@nvidia.com; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>; Hanumanth Reddy
> Pothula <hpothula@marvell.com>
> Subject: [PATCH v6 1/3] ethdev: support mulitiple mbuf pools per Rx queue
> 
> This patch adds support for multiple mempool capability.
> Some of the HW has support for choosing memory pools based on the packet's
> size. The capability allows PMD to choose a memory pool based on the packet's
> length.
> 
> This is often useful for saving the memory where the application can create a
> different pool to steer the specific size of the packet, thus enabling effective use
> of memory.
> 
> For example, let's say HW has a capability of three pools,
>  - pool-1 size is 2K
>  - pool-2 size is > 2K and < 4K
>  - pool-3 size is > 4K
> Here,
>         pool-1 can accommodate packets with sizes < 2K
>         pool-2 can accommodate packets with sizes > 2K and < 4K
>         pool-3 can accommodate packets with sizes > 4K
> 
> With multiple mempool capability enabled in SW, an application may create
> three pools of different sizes and send them to PMD. Allowing PMD to program
> HW based on the packet lengths. So that packets with less than 2K are received
> on pool-1, packets with lengths between 2K and 4K are received on pool-2 and
> finally packets greater than 4K are received on pool-3.
> 
> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> 
> v6:
>  - Updated release notes, release_22_11.rst.
> v5:
>  - Declared memory pools as struct rte_mempool **rx_mempools rather than
>    as struct rte_mempool *mp.
>  - Added the feature in release notes.
>  - Updated conditions and strings as per review comments.
> v4:
>  - Renamed Offload capability name from
> RTE_ETH_RX_OFFLOAD_BUFFER_SORT
>    to RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL.
>  - In struct rte_eth_rxconf, defined new pointer, which holds array of
>    type struct rte_eth_rx_mempool(memory pools). This array is used
>    by PMD to program multiple mempools.
> v3:
>  - Implemented Pool Sort capability as new Rx offload capability,
>    RTE_ETH_RX_OFFLOAD_BUFFER_SORT.
> v2:
>  - Along with spec changes, uploading testpmd and driver changes.
> ---
>  doc/guides/rel_notes/release_22_11.rst |  6 +++
>  lib/ethdev/rte_ethdev.c                | 74 ++++++++++++++++++++++----
>  lib/ethdev/rte_ethdev.h                | 22 ++++++++
>  3 files changed, 92 insertions(+), 10 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/release_22_11.rst
> b/doc/guides/rel_notes/release_22_11.rst
> index 2e076ba2ad..8bb19155d9 100644
> --- a/doc/guides/rel_notes/release_22_11.rst
> +++ b/doc/guides/rel_notes/release_22_11.rst
> @@ -55,6 +55,12 @@ New Features
>       Also, make sure to start the actual text at the margin.
>       =======================================================
> 
> +* ** Added support ethdev support for mulitiple mbuf pools per Rx
> +queue.**
> +
> +  * Added new Rx offload flag ``RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL`` to
> support
> +    mulitiple mbuf pools per Rx queue. Thisi capability allows PMD to choose
> +    a memory pool based on the packet's length
> +
>  * **Updated Wangxun ngbe driver.**
> 
>    * Added support to set device link down/up.
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
> 1979dc0850..eed4834e6b 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -1634,6 +1634,44 @@ rte_eth_dev_is_removed(uint16_t port_id)
>  	return ret;
>  }
> 
> +static int
> +rte_eth_rx_queue_check_mempool(struct rte_mempool **rx_mempool,
> +			       uint16_t n_pool, uint32_t *mbp_buf_size,
> +			       const struct rte_eth_dev_info *dev_info) {
> +	uint16_t pool_idx;
> +
> +	if (n_pool > dev_info->max_pools) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Too many Rx mempools %u vs maximum %u\n",
> +			       n_pool, dev_info->max_pools);
> +		return -EINVAL;
> +	}
> +
> +	for (pool_idx = 0; pool_idx < n_pool; pool_idx++) {
> +		struct rte_mempool *mpl = rx_mempool[pool_idx];
> +
> +		if (mpl == NULL) {
> +			RTE_ETHDEV_LOG(ERR, "null Rx mempool pointer\n");
> +			return -EINVAL;
> +		}
> +
> +		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
> +		if (*mbp_buf_size < dev_info->min_rx_bufsize +
> +		    RTE_PKTMBUF_HEADROOM) {
> +			RTE_ETHDEV_LOG(ERR,
> +				       "%s mbuf_data_room_size %u < %u
> (RTE_PKTMBUF_HEADROOM=%u + min_rx_bufsize(dev)=%u)\n",
> +					mpl->name, *mbp_buf_size,
> +					RTE_PKTMBUF_HEADROOM +
> dev_info->min_rx_bufsize,
> +					RTE_PKTMBUF_HEADROOM,
> +					dev_info->min_rx_bufsize);
> +			return -EINVAL;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  static int
>  rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
>  			     uint16_t n_seg, uint32_t *mbp_buf_size, @@ -
> 1733,9 +1771,12 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t
> rx_queue_id,
> 
>  	if (mp != NULL) {
>  		/* Single pool configuration check. */
> -		if (rx_conf != NULL && rx_conf->rx_nseg != 0) {
> +		if (((rx_conf->offloads &
> RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) &&
> +		    rx_conf != NULL && rx_conf->rx_nseg != 0) ||
> +		   ((rx_conf->offloads &
> RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL) &&
> +		    rx_conf != NULL && rx_conf->rx_npool != 0)) {
>  			RTE_ETHDEV_LOG(ERR,
> -				       "Ambiguous segment configuration\n");
> +				       "Ambiguous Rx mempools
> configuration\n");
>  			return -EINVAL;
>  		}
>  		/*
> @@ -1763,30 +1804,43 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t
> rx_queue_id,
>  				       dev_info.min_rx_bufsize);
>  			return -EINVAL;
>  		}
> -	} else {
> +	} else if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
>  		const struct rte_eth_rxseg_split *rx_seg;
>  		uint16_t n_seg;
> 
>  		/* Extended multi-segment configuration check. */
>  		if (rx_conf == NULL || rx_conf->rx_seg == NULL || rx_conf-
> >rx_nseg == 0) {
>  			RTE_ETHDEV_LOG(ERR,
> -				       "Memory pool is null and no extended
> configuration provided\n");
> +				       "Memory pool is null and no multi-segment
> configuration
> +provided\n");
>  			return -EINVAL;
>  		}
> 
>  		rx_seg = (const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
>  		n_seg = rx_conf->rx_nseg;
> 
> -		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)
> {
> -			ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
> +		ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
>  							   &mbp_buf_size,
>  							   &dev_info);
> -			if (ret != 0)
> -				return ret;
> -		} else {
> -			RTE_ETHDEV_LOG(ERR, "No Rx segmentation offload
> configured\n");
> +		if (ret != 0)
> +			return ret;
> +	} else if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL)
> {
> +		/* Extended multi-pool configuration check. */
> +		if (rx_conf == NULL || rx_conf->rx_mempools == NULL ||
> rx_conf->rx_npool == 0) {
> +			RTE_ETHDEV_LOG(ERR,
> +				       "Memory pool is null and no multi-pool
> configuration
> +provided\n");
>  			return -EINVAL;
>  		}
> +
> +		ret = rte_eth_rx_queue_check_mempool(rx_conf-
> >rx_mempools,
> +						     rx_conf->rx_npool,
> +						     &mbp_buf_size,
> +						     &dev_info);
> +
> +		if (ret != 0)
> +			return ret;
> +	} else {
> +		RTE_ETHDEV_LOG(ERR, "Missing Rx mempool
> configuration\n");
> +		return -EINVAL;
>  	}
> 
>  	/* Use default specified by driver, if nb_rx_desc is zero */ diff --git
> a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
> b62ac5bb6f..306c2b3573 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -1067,6 +1067,25 @@ struct rte_eth_rxconf {
>  	 */
>  	union rte_eth_rxseg *rx_seg;
> 
> +	/**
> +	 * Points to an array of mempools.
> +	 *
> +	 * Valid only when RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL flag is set
> in
> +	 * Rx offloads.
> +	 *
> +	 * This provides support for  multiple mbuf pools per Rx queue.
> +	 *
> +	 * This is often useful for saving the memory where the application can
> +	 * create a different pools to steer the specific size of the packet, thus
> +	 * enabling effective use of memory.
> +	 *
> +	 * Note that on Rx scatter enable, a packet may be delivered using a
> chain
> +	 * of mbufs obtained from single mempool or multiple mempools based
> on
> +	 * the NIC implementation.
> +	 */
> +	struct rte_mempool **rx_mempools;
> +	uint16_t rx_npool; /** < number of mempools */
> +
>  	uint64_t reserved_64s[2]; /**< Reserved for future fields */
>  	void *reserved_ptrs[2];   /**< Reserved for future fields */
>  };
> @@ -1395,6 +1414,7 @@ struct rte_eth_conf {  #define
> RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM  RTE_BIT64(18)
>  #define RTE_ETH_RX_OFFLOAD_RSS_HASH         RTE_BIT64(19)
>  #define RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT     RTE_BIT64(20)
> +#define RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL      RTE_BIT64(21)
> 
>  #define RTE_ETH_RX_OFFLOAD_CHECKSUM
> (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \
>  				 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | \
> @@ -1615,6 +1635,8 @@ struct rte_eth_dev_info {
>  	/** Configured number of Rx/Tx queues */
>  	uint16_t nb_rx_queues; /**< Number of Rx queues. */
>  	uint16_t nb_tx_queues; /**< Number of Tx queues. */
> +	/** Maximum number of pools supported per Rx queue. */
> +	uint16_t max_pools;
>  	/** Rx parameter recommendations */
>  	struct rte_eth_dev_portconf default_rxportconf;
>  	/** Tx parameter recommendations */
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v5 1/3] ethdev: support mulitiple mbuf pools per Rx queue
  2022-10-06 17:29         ` [PATCH v5 1/3] ethdev: " Stephen Hemminger
@ 2022-10-07 14:13           ` Andrew Rybchenko
  0 siblings, 0 replies; 75+ messages in thread
From: Andrew Rybchenko @ 2022-10-07 14:13 UTC (permalink / raw)
  To: Stephen Hemminger, Hanumanth Pothula
  Cc: Thomas Monjalon, Ferruh Yigit, dev, xuan.ding, wenxuanx.wu,
	xiaoyun.li, yuanx.wang, mdr, yuying.zhang, qi.z.zhang,
	viacheslavo, jerinj, ndabilpuram

On 10/6/22 20:29, Stephen Hemminger wrote:
> On Thu, 6 Oct 2022 22:31:24 +0530
> Hanumanth Pothula <hpothula@marvell.com> wrote:
> 
>> +	/**
>> +	 * Points to an array of mempools.
>> +	 *
>> +	 * Valid only when RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL flag is set in
>> +	 * Rx offloads.
>> +	 *
>> +	 * This provides support for  multiple mbuf pools per Rx queue.
>> +	 *
>> +	 * This is often useful for saving the memory where the application can
>> +	 * create a different pools to steer the specific size of the packet, thus
>> +	 * enabling effective use of memory.
>> +	 *
>> +	 * Note that on Rx scatter enable, a packet may be delivered using a chain
>> +	 * of mbufs obtained from single mempool or multiple mempools based on
>> +	 * the NIC implementation.
>> +	 */
>> +	struct rte_mempool **rx_mempools;
>> +	uint16_t rx_npool; /** < number of mempools */
>> +
>>   	uint64_t reserved_64s[2]; /**< Reserved for future fields */
>>   	void *reserved_ptrs[2];   /**< Reserved for future fields */
> 
> Better and safer to just take up some of those existing reserved fields.
> 

I don't understand why. We're braking ABI anyway.

^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v7 0/4] ethdev: support mulitiple mbuf pools per Rx queue
  2022-10-06 17:01       ` [PATCH v5 1/3] ethdev: support " Hanumanth Pothula
                           ` (3 preceding siblings ...)
  2022-10-06 17:53         ` [PATCH v6 " Hanumanth Pothula
@ 2022-10-07 14:37         ` Andrew Rybchenko
  2022-10-07 14:37           ` [PATCH v7 1/4] ethdev: factor out helper function to check Rx mempool Andrew Rybchenko
                             ` (3 more replies)
  2022-10-07 17:29         ` [PATCH v8 0/4] ethdev: " Andrew Rybchenko
  5 siblings, 4 replies; 75+ messages in thread
From: Andrew Rybchenko @ 2022-10-07 14:37 UTC (permalink / raw)
  To: Hanumanth Pothula, Thomas Monjalon, Ferruh Yigit; +Cc: dev

I'm not sure in testpmd patch. Review would be useful and may be we
should postpone it to rc2.

v7:
 - Drop RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL offload which seems to be
   unnecessary. Positive max_rx_mempools in dev_info is sufficient to
   indicate that the capability is support and positive number of
   mempools in Rx configuration is sufficient to request it.
 - Add helper patch to factor out Rx mempool check to be shared
   for single mempool, buffer split and multiple mempools case.
 - Refine check for a way to provide Rx buffers to be one and only one.
   Either single mempool, or buffer split, or multi mempool.
 - Drop feature advertisement in net/cnxk patch since there is no
   such feature defined yet. I have no strong opinion if a new feature
   is required or not.
v6:
 - Updated release notes, release_22_11.rst.
v5:
 - Declared memory pools as struct rte_mempool **rx_mempools rather than
   as struct rte_mempool *mp.
 - Added the feature in release notes.
 - Updated conditions and strings as per review comments.
v4:
 - Renamed Offload capability name from RTE_ETH_RX_OFFLOAD_BUFFER_SORT
   to RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL.
 - In struct rte_eth_rxconf, defined new pointer, which holds array of
   type struct rte_eth_rx_mempool(memory pools). This array is used
   by PMD to program multiple mempools.
v3:
 - Implemented Pool Sort capability as new Rx offload capability,
   RTE_ETH_RX_OFFLOAD_BUFFER_SORT.
v2:
 - Along with spec changes, uploading testpmd and driver changes.

Andrew Rybchenko (1):
  ethdev: factor out helper function to check Rx mempool

Hanumanth Pothula (3):
  ethdev: support mulitiple mbuf pools per Rx queue
  net/cnxk: support mulitiple mbuf pools per Rx queue
  app/testpmd: support mulitiple mbuf pools per Rx queue

 app/test-pmd/testpmd.c                 |  34 ++++--
 app/test-pmd/testpmd.h                 |   3 +
 app/test-pmd/util.c                    |   4 +-
 doc/guides/rel_notes/release_22_11.rst |   6 +
 drivers/net/cnxk/cnxk_ethdev.c         |  84 +++++++++++--
 drivers/net/cnxk/cnxk_ethdev.h         |   2 +
 drivers/net/cnxk/cnxk_ethdev_ops.c     |   3 +
 lib/ethdev/rte_ethdev.c                | 157 +++++++++++++++++--------
 lib/ethdev/rte_ethdev.h                |  25 ++++
 9 files changed, 249 insertions(+), 69 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v7 1/4] ethdev: factor out helper function to check Rx mempool
  2022-10-07 14:37         ` [PATCH v7 0/4] " Andrew Rybchenko
@ 2022-10-07 14:37           ` Andrew Rybchenko
  2022-10-07 14:37           ` [PATCH v7 2/4] ethdev: support mulitiple mbuf pools per Rx queue Andrew Rybchenko
                             ` (2 subsequent siblings)
  3 siblings, 0 replies; 75+ messages in thread
From: Andrew Rybchenko @ 2022-10-07 14:37 UTC (permalink / raw)
  To: Thomas Monjalon, Ferruh Yigit; +Cc: dev

Avoid Rx mempool checks duplication logic.

Signed-off-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
 lib/ethdev/rte_ethdev.c | 82 +++++++++++++++++++++--------------------
 1 file changed, 42 insertions(+), 40 deletions(-)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 01fac713a2..b3dba291e7 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1648,6 +1648,36 @@ rte_eth_dev_is_removed(uint16_t port_id)
 	return ret;
 }
 
+static int
+rte_eth_check_rx_mempool(struct rte_mempool *mp, uint16_t offset,
+			 uint16_t min_length)
+{
+	uint16_t data_room_size;
+
+	/*
+	 * Check the size of the mbuf data buffer, this value
+	 * must be provided in the private data of the memory pool.
+	 * First check that the memory pool(s) has a valid private data.
+	 */
+	if (mp->private_data_size <
+			sizeof(struct rte_pktmbuf_pool_private)) {
+		RTE_ETHDEV_LOG(ERR, "%s private_data_size %u < %u\n",
+			mp->name, mp->private_data_size,
+			(unsigned int)
+			sizeof(struct rte_pktmbuf_pool_private));
+		return -ENOSPC;
+	}
+	data_room_size = rte_pktmbuf_data_room_size(mp);
+	if (data_room_size < offset + min_length) {
+		RTE_ETHDEV_LOG(ERR,
+			       "%s mbuf_data_room_size %u < %u (%u + %u)\n",
+			       mp->name, data_room_size,
+			       offset + min_length, offset, min_length);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int
 rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 			     uint16_t n_seg, uint32_t *mbp_buf_size,
@@ -1657,6 +1687,7 @@ rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 	struct rte_mempool *mp_first;
 	uint32_t offset_mask;
 	uint16_t seg_idx;
+	int ret;
 
 	if (n_seg > seg_capa->max_nseg) {
 		RTE_ETHDEV_LOG(ERR,
@@ -1696,25 +1727,14 @@ rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 				return -EINVAL;
 			}
 		}
-		if (mpl->private_data_size <
-			sizeof(struct rte_pktmbuf_pool_private)) {
-			RTE_ETHDEV_LOG(ERR,
-				       "%s private_data_size %u < %u\n",
-				       mpl->name, mpl->private_data_size,
-				       (unsigned int)sizeof
-					(struct rte_pktmbuf_pool_private));
-			return -ENOSPC;
-		}
+
 		offset += seg_idx != 0 ? 0 : RTE_PKTMBUF_HEADROOM;
 		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
 		length = length != 0 ? length : *mbp_buf_size;
-		if (*mbp_buf_size < length + offset) {
-			RTE_ETHDEV_LOG(ERR,
-				       "%s mbuf_data_room_size %u < %u (segment length=%u + segment offset=%u)\n",
-				       mpl->name, *mbp_buf_size,
-				       length + offset, length, offset);
-			return -EINVAL;
-		}
+
+		ret = rte_eth_check_rx_mempool(mpl, offset, length);
+		if (ret != 0)
+			return ret;
 	}
 	return 0;
 }
@@ -1753,31 +1773,13 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 				       "Ambiguous segment configuration\n");
 			return -EINVAL;
 		}
-		/*
-		 * Check the size of the mbuf data buffer, this value
-		 * must be provided in the private data of the memory pool.
-		 * First check that the memory pool(s) has a valid private data.
-		 */
-		if (mp->private_data_size <
-				sizeof(struct rte_pktmbuf_pool_private)) {
-			RTE_ETHDEV_LOG(ERR, "%s private_data_size %u < %u\n",
-				mp->name, mp->private_data_size,
-				(unsigned int)
-				sizeof(struct rte_pktmbuf_pool_private));
-			return -ENOSPC;
-		}
+
+		ret = rte_eth_check_rx_mempool(mp, RTE_PKTMBUF_HEADROOM,
+					       dev_info.min_rx_bufsize);
+		if (ret != 0)
+			return ret;
+
 		mbp_buf_size = rte_pktmbuf_data_room_size(mp);
-		if (mbp_buf_size < dev_info.min_rx_bufsize +
-				   RTE_PKTMBUF_HEADROOM) {
-			RTE_ETHDEV_LOG(ERR,
-				       "%s mbuf_data_room_size %u < %u (RTE_PKTMBUF_HEADROOM=%u + min_rx_bufsize(dev)=%u)\n",
-				       mp->name, mbp_buf_size,
-				       RTE_PKTMBUF_HEADROOM +
-				       dev_info.min_rx_bufsize,
-				       RTE_PKTMBUF_HEADROOM,
-				       dev_info.min_rx_bufsize);
-			return -EINVAL;
-		}
 	} else {
 		const struct rte_eth_rxseg_split *rx_seg;
 		uint16_t n_seg;
-- 
2.30.2


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v7 2/4] ethdev: support mulitiple mbuf pools per Rx queue
  2022-10-07 14:37         ` [PATCH v7 0/4] " Andrew Rybchenko
  2022-10-07 14:37           ` [PATCH v7 1/4] ethdev: factor out helper function to check Rx mempool Andrew Rybchenko
@ 2022-10-07 14:37           ` Andrew Rybchenko
  2022-10-07 16:08             ` Thomas Monjalon
  2022-10-07 14:37           ` [PATCH v7 3/4] net/cnxk: " Andrew Rybchenko
  2022-10-07 14:37           ` [PATCH v7 4/4] app/testpmd: " Andrew Rybchenko
  3 siblings, 1 reply; 75+ messages in thread
From: Andrew Rybchenko @ 2022-10-07 14:37 UTC (permalink / raw)
  To: Thomas Monjalon, Ferruh Yigit; +Cc: dev, Hanumanth Pothula

From: Hanumanth Pothula <hpothula@marvell.com>

Some of the HW has support for choosing memory pools based on the
packet's size. The capability allows to choose a memory pool based
on the packet's length.

This is often useful for saving the memory where the application
can create a different pool to steer the specific size of the
packet, thus enabling more efficient usage of memory.

For example, let's say HW has a capability of three pools,
 - pool-1 size is 2K
 - pool-2 size is > 2K and < 4K
 - pool-3 size is > 4K
Here,
        pool-1 can accommodate packets with sizes < 2K
        pool-2 can accommodate packets with sizes > 2K and < 4K
        pool-3 can accommodate packets with sizes > 4K

With multiple mempool capability enabled in SW, an application may
create three pools of different sizes and send them to PMD. Allowing
PMD to program HW based on the packet lengths. So that packets with
less than 2K are received on pool-1, packets with lengths between 2K
and 4K are received on pool-2 and finally packets greater than 4K
are received on pool-3.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
Signed-off-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
 doc/guides/rel_notes/release_22_11.rst |  6 ++
 lib/ethdev/rte_ethdev.c                | 81 ++++++++++++++++++++++----
 lib/ethdev/rte_ethdev.h                | 25 ++++++++
 3 files changed, 101 insertions(+), 11 deletions(-)

diff --git a/doc/guides/rel_notes/release_22_11.rst b/doc/guides/rel_notes/release_22_11.rst
index e165c45367..fa830d325c 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -92,6 +92,12 @@ New Features
   ``rte_eth_cman_config_set()``, ``rte_eth_cman_info_get()``
   to support congestion management.
 
+* **Added support for mulitiple mbuf pools per ethdev Rx queue.**
+
+  * Added support for multiple mbuf pools per Rx queue. The capability allows
+    application to provide many mempools of different size and PMD to choose
+    a memory pool based on the packet's length and/or Rx buffers availability.
+
 * **Updated Intel iavf driver.**
 
   * Added flow subscription support.
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index b3dba291e7..6026cf4f98 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1739,6 +1739,41 @@ rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 	return 0;
 }
 
+static int
+rte_eth_rx_queue_check_mempools(struct rte_mempool **rx_mempools,
+			       uint16_t n_mempools, uint32_t *min_buf_size,
+			       const struct rte_eth_dev_info *dev_info)
+{
+	uint16_t pool_idx;
+	int ret;
+
+	if (n_mempools > dev_info->max_rx_mempools) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Too many Rx mempools %u vs maximum %u\n",
+			       n_mempools, dev_info->max_rx_mempools);
+		return -EINVAL;
+	}
+
+	for (pool_idx = 0; pool_idx < n_mempools; pool_idx++) {
+		struct rte_mempool *mp = rx_mempools[pool_idx];
+
+		if (mp == NULL) {
+			RTE_ETHDEV_LOG(ERR, "null Rx mempool pointer\n");
+			return -EINVAL;
+		}
+
+		ret = rte_eth_check_rx_mempool(mp, RTE_PKTMBUF_HEADROOM,
+					       dev_info->min_rx_bufsize);
+		if (ret != 0)
+			return ret;
+
+		*min_buf_size = RTE_MIN(*min_buf_size,
+					rte_pktmbuf_data_room_size(mp));
+	}
+
+	return 0;
+}
+
 int
 rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		       uint16_t nb_rx_desc, unsigned int socket_id,
@@ -1746,7 +1781,8 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		       struct rte_mempool *mp)
 {
 	int ret;
-	uint32_t mbp_buf_size;
+	uint64_t rx_offloads;
+	uint32_t mbp_buf_size = UINT32_MAX;
 	struct rte_eth_dev *dev;
 	struct rte_eth_dev_info dev_info;
 	struct rte_eth_rxconf local_conf;
@@ -1766,35 +1802,42 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	if (ret != 0)
 		return ret;
 
+	rx_offloads = dev->data->dev_conf.rxmode.offloads;
+	if (rx_conf != NULL)
+		rx_offloads |= rx_conf->offloads;
+
+	/* Ensure that we have one and only one source of Rx buffers */
+	if ((mp != NULL) +
+	    (rx_conf != NULL && rx_conf->rx_nseg > 0) +
+	    (rx_conf != NULL && rx_conf->rx_nmempool > 0) != 1) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Ambiguous Rx mempools configuration\n");
+		return -EINVAL;
+	}
+
 	if (mp != NULL) {
 		/* Single pool configuration check. */
-		if (rx_conf != NULL && rx_conf->rx_nseg != 0) {
-			RTE_ETHDEV_LOG(ERR,
-				       "Ambiguous segment configuration\n");
-			return -EINVAL;
-		}
-
 		ret = rte_eth_check_rx_mempool(mp, RTE_PKTMBUF_HEADROOM,
 					       dev_info.min_rx_bufsize);
 		if (ret != 0)
 			return ret;
 
 		mbp_buf_size = rte_pktmbuf_data_room_size(mp);
-	} else {
+	} else if (rx_conf == NULL || rx_conf->rx_nseg > 0) {
 		const struct rte_eth_rxseg_split *rx_seg;
 		uint16_t n_seg;
 
 		/* Extended multi-segment configuration check. */
-		if (rx_conf == NULL || rx_conf->rx_seg == NULL || rx_conf->rx_nseg == 0) {
+		if (rx_conf->rx_seg == NULL) {
 			RTE_ETHDEV_LOG(ERR,
-				       "Memory pool is null and no extended configuration provided\n");
+				       "Memory pool is null and no multi-segment configuration provided\n");
 			return -EINVAL;
 		}
 
 		rx_seg = (const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
 		n_seg = rx_conf->rx_nseg;
 
-		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+		if (rx_offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
 			ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
 							   &mbp_buf_size,
 							   &dev_info);
@@ -1804,6 +1847,22 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 			RTE_ETHDEV_LOG(ERR, "No Rx segmentation offload configured\n");
 			return -EINVAL;
 		}
+	} else if (rx_conf == NULL || rx_conf->rx_nmempool > 0) {
+		/* Extended multi-pool configuration check. */
+		if (rx_conf->rx_mempools == NULL) {
+			RTE_ETHDEV_LOG(ERR, "Memory pools array is null\n");
+			return -EINVAL;
+		}
+
+		ret = rte_eth_rx_queue_check_mempools(rx_conf->rx_mempools,
+						     rx_conf->rx_nmempool,
+						     &mbp_buf_size,
+						     &dev_info);
+		if (ret != 0)
+			return ret;
+	} else {
+		RTE_ETHDEV_LOG(ERR, "Missing Rx mempool configuration\n");
+		return -EINVAL;
 	}
 
 	/* Use default specified by driver, if nb_rx_desc is zero */
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 2530eda7c4..7295aa942e 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1067,6 +1067,24 @@ struct rte_eth_rxconf {
 	 */
 	union rte_eth_rxseg *rx_seg;
 
+	/**
+	 * Array of mempools to allocate Rx buffers from.
+	 *
+	 * This provides support for multiple mbuf pools per Rx queue.
+	 * The capability is reported in device info via positive
+	 * max_rx_mempools.
+	 *
+	 * It could be useful for more efficient usage of memory when an
+	 * application creates different mempools to steer the specific
+	 * size of the packet.
+	 *
+	 * Note that if Rx scatter is enabled, a packet may be delivered using
+	 * a chain of mbufs obtained from single mempool or multiple mempools
+	 * based on the NIC implementation.
+	 */
+	struct rte_mempool **rx_mempools;
+	uint16_t rx_nmempool; /** < Number of Rx mempools */
+
 	uint64_t reserved_64s[2]; /**< Reserved for future fields */
 	void *reserved_ptrs[2];   /**< Reserved for future fields */
 };
@@ -1614,6 +1632,13 @@ struct rte_eth_dev_info {
 	/** Configured number of Rx/Tx queues */
 	uint16_t nb_rx_queues; /**< Number of Rx queues. */
 	uint16_t nb_tx_queues; /**< Number of Tx queues. */
+	/**
+	 * Maximum number of Rx mempools supported per Rx queue.
+	 *
+	 * Value greater than 0 means that the driver supports Rx queue
+	 * mempools specification via rx_conf->rx_mempools.
+	 */
+	uint16_t max_rx_mempools;
 	/** Rx parameter recommendations */
 	struct rte_eth_dev_portconf default_rxportconf;
 	/** Tx parameter recommendations */
-- 
2.30.2


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v7 3/4] net/cnxk: support mulitiple mbuf pools per Rx queue
  2022-10-07 14:37         ` [PATCH v7 0/4] " Andrew Rybchenko
  2022-10-07 14:37           ` [PATCH v7 1/4] ethdev: factor out helper function to check Rx mempool Andrew Rybchenko
  2022-10-07 14:37           ` [PATCH v7 2/4] ethdev: support mulitiple mbuf pools per Rx queue Andrew Rybchenko
@ 2022-10-07 14:37           ` Andrew Rybchenko
  2022-10-07 14:37           ` [PATCH v7 4/4] app/testpmd: " Andrew Rybchenko
  3 siblings, 0 replies; 75+ messages in thread
From: Andrew Rybchenko @ 2022-10-07 14:37 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
  Cc: dev, Hanumanth Pothula

From: Hanumanth Pothula <hpothula@marvell.com>

Presently, HW is programmed only to receive packets from LPB pool.
Making all packets received from LPB pool.

But, CNXK HW supports two pools,
 - SPB -> packets with smaller size (less than 4K)
 - LPB -> packets with bigger size (greater than 4K)

Patch enables multiple mempool capability, pool is selected based
on the packet's length. So, basically, PMD programs HW for receiving
packets from both SPB and LPB pools based on the packet's length.

This is achieved by enabling rx multiple mempool offload,
RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL. This allows the application to send
more than one pool(in our case two) to the driver, with different
segment(packet) lengths, which helps the driver to configure both
pools based on segment lengths.

This is often useful for saving the memory where the application
can create a different pool to steer the specific size of the
packet, thus enabling effective use of memory.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
---
 drivers/net/cnxk/cnxk_ethdev.c     | 84 ++++++++++++++++++++++++++----
 drivers/net/cnxk/cnxk_ethdev.h     |  2 +
 drivers/net/cnxk/cnxk_ethdev_ops.c |  3 ++
 3 files changed, 80 insertions(+), 9 deletions(-)

diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c
index 2cb48ba152..bb27cc87fd 100644
--- a/drivers/net/cnxk/cnxk_ethdev.c
+++ b/drivers/net/cnxk/cnxk_ethdev.c
@@ -541,6 +541,58 @@ cnxk_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid)
 	plt_free(txq_sp);
 }
 
+static int
+cnxk_nix_process_rx_conf(const struct rte_eth_rxconf *rx_conf,
+			 struct rte_mempool **lpb_pool,
+			 struct rte_mempool **spb_pool)
+{
+	struct rte_mempool *pool0;
+	struct rte_mempool *pool1;
+	struct rte_mempool **mp = rx_conf->rx_mempools;
+	const char *platform_ops;
+	struct rte_mempool_ops *ops;
+
+	if (*lpb_pool ||
+	    rx_conf->rx_nmempool != CNXK_NIX_NUM_POOLS_MAX) {
+		plt_err("invalid arguments");
+		return -EINVAL;
+	}
+
+	if (mp == NULL || mp[0] == NULL || mp[1] == NULL) {
+		plt_err("invalid memory pools\n");
+		return -EINVAL;
+	}
+
+	pool0 = mp[0];
+	pool1 = mp[1];
+
+	if (pool0->elt_size > pool1->elt_size) {
+		*lpb_pool = pool0;
+		*spb_pool = pool1;
+
+	} else {
+		*lpb_pool = pool1;
+		*spb_pool = pool0;
+	}
+
+	if ((*spb_pool)->pool_id == 0) {
+		plt_err("Invalid pool_id");
+		return -EINVAL;
+	}
+
+	platform_ops = rte_mbuf_platform_mempool_ops();
+	ops = rte_mempool_get_ops((*spb_pool)->ops_index);
+	if (strncmp(ops->name, platform_ops, RTE_MEMPOOL_OPS_NAMESIZE)) {
+		plt_err("mempool ops should be of cnxk_npa type");
+		return -EINVAL;
+	}
+
+	plt_info("spb_pool:%s lpb_pool:%s lpb_len:%u spb_len:%u\n", (*spb_pool)->name,
+		 (*lpb_pool)->name, (*lpb_pool)->elt_size, (*spb_pool)->elt_size);
+
+	return 0;
+}
+
 int
 cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			uint32_t nb_desc, uint16_t fp_rx_q_sz,
@@ -557,6 +609,8 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	uint16_t first_skip;
 	int rc = -EINVAL;
 	size_t rxq_sz;
+	struct rte_mempool *lpb_pool = mp;
+	struct rte_mempool *spb_pool = NULL;
 
 	/* Sanity checks */
 	if (rx_conf->rx_deferred_start == 1) {
@@ -564,15 +618,21 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 		goto fail;
 	}
 
+	if (rx_conf->rx_nmempool > 0) {
+		rc = cnxk_nix_process_rx_conf(rx_conf, &lpb_pool, &spb_pool);
+		if (rc)
+			goto fail;
+	}
+
 	platform_ops = rte_mbuf_platform_mempool_ops();
 	/* This driver needs cnxk_npa mempool ops to work */
-	ops = rte_mempool_get_ops(mp->ops_index);
+	ops = rte_mempool_get_ops(lpb_pool->ops_index);
 	if (strncmp(ops->name, platform_ops, RTE_MEMPOOL_OPS_NAMESIZE)) {
 		plt_err("mempool ops should be of cnxk_npa type");
 		goto fail;
 	}
 
-	if (mp->pool_id == 0) {
+	if (lpb_pool->pool_id == 0) {
 		plt_err("Invalid pool_id");
 		goto fail;
 	}
@@ -589,13 +649,13 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Its a no-op when inline device is not used */
 	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY ||
 	    dev->tx_offloads & RTE_ETH_TX_OFFLOAD_SECURITY)
-		roc_nix_inl_dev_xaq_realloc(mp->pool_id);
+		roc_nix_inl_dev_xaq_realloc(lpb_pool->pool_id);
 
 	/* Increase CQ size to Aura size to avoid CQ overflow and
 	 * then CPT buffer leak.
 	 */
 	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY)
-		nb_desc = nix_inl_cq_sz_clamp_up(nix, mp, nb_desc);
+		nb_desc = nix_inl_cq_sz_clamp_up(nix, lpb_pool, nb_desc);
 
 	/* Setup ROC CQ */
 	cq = &dev->cqs[qid];
@@ -611,17 +671,17 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	rq = &dev->rqs[qid];
 	rq->qid = qid;
 	rq->cqid = cq->qid;
-	rq->aura_handle = mp->pool_id;
+	rq->aura_handle = lpb_pool->pool_id;
 	rq->flow_tag_width = 32;
 	rq->sso_ena = false;
 
 	/* Calculate first mbuf skip */
 	first_skip = (sizeof(struct rte_mbuf));
 	first_skip += RTE_PKTMBUF_HEADROOM;
-	first_skip += rte_pktmbuf_priv_size(mp);
+	first_skip += rte_pktmbuf_priv_size(lpb_pool);
 	rq->first_skip = first_skip;
 	rq->later_skip = sizeof(struct rte_mbuf);
-	rq->lpb_size = mp->elt_size;
+	rq->lpb_size = lpb_pool->elt_size;
 	if (roc_errata_nix_no_meta_aura())
 		rq->lpb_drop_ena = !(dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY);
 
@@ -629,6 +689,12 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	if (roc_nix_inl_inb_is_enabled(nix))
 		rq->ipsech_ena = true;
 
+	if (spb_pool) {
+		rq->spb_ena = 1;
+		rq->spb_aura_handle = spb_pool->pool_id;
+		rq->spb_size = spb_pool->elt_size;
+	}
+
 	rc = roc_nix_rq_init(&dev->nix, rq, !!eth_dev->data->dev_started);
 	if (rc) {
 		plt_err("Failed to init roc rq for rq=%d, rc=%d", qid, rc);
@@ -651,7 +717,7 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Queue config should reflect global offloads */
 	rxq_sp->qconf.conf.rx.offloads = dev->rx_offloads;
 	rxq_sp->qconf.nb_desc = nb_desc;
-	rxq_sp->qconf.mp = mp;
+	rxq_sp->qconf.mp = lpb_pool;
 	rxq_sp->tc = 0;
 	rxq_sp->tx_pause = (dev->fc_cfg.mode == RTE_ETH_FC_FULL ||
 			    dev->fc_cfg.mode == RTE_ETH_FC_TX_PAUSE);
@@ -670,7 +736,7 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			goto free_mem;
 	}
 
-	plt_nix_dbg("rq=%d pool=%s nb_desc=%d->%d", qid, mp->name, nb_desc,
+	plt_nix_dbg("rq=%d pool=%s nb_desc=%d->%d", qid, lpb_pool->name, nb_desc,
 		    cq->nb_desc);
 
 	/* Store start of fast path area */
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 5204c46244..d282f79a9a 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -44,6 +44,8 @@
 #define CNXK_NIX_RX_DEFAULT_RING_SZ 4096
 /* Max supported SQB count */
 #define CNXK_NIX_TX_MAX_SQB 512
+/* LPB & SPB */
+#define CNXK_NIX_NUM_POOLS_MAX 2
 
 /* If PTP is enabled additional SEND MEM DESC is required which
  * takes 2 words, hence max 7 iova address are possible
diff --git a/drivers/net/cnxk/cnxk_ethdev_ops.c b/drivers/net/cnxk/cnxk_ethdev_ops.c
index 30d169f799..8f7287161b 100644
--- a/drivers/net/cnxk/cnxk_ethdev_ops.c
+++ b/drivers/net/cnxk/cnxk_ethdev_ops.c
@@ -69,6 +69,9 @@ cnxk_nix_info_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *devinfo)
 	devinfo->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
 			    RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP |
 			    RTE_ETH_DEV_CAPA_FLOW_RULE_KEEP;
+
+	devinfo->max_rx_mempools = CNXK_NIX_NUM_POOLS_MAX;
+
 	return 0;
 }
 
-- 
2.30.2


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v7 4/4] app/testpmd: support mulitiple mbuf pools per Rx queue
  2022-10-07 14:37         ` [PATCH v7 0/4] " Andrew Rybchenko
                             ` (2 preceding siblings ...)
  2022-10-07 14:37           ` [PATCH v7 3/4] net/cnxk: " Andrew Rybchenko
@ 2022-10-07 14:37           ` Andrew Rybchenko
  3 siblings, 0 replies; 75+ messages in thread
From: Andrew Rybchenko @ 2022-10-07 14:37 UTC (permalink / raw)
  To: Aman Singh, Yuying Zhang; +Cc: dev, Hanumanth Pothula

From: Hanumanth Pothula <hpothula@marvell.com>

This patch adds support for the mulitiple mempool.
Some of the HW has support for choosing memory pools based on the
packet's size. The pool sort capability allows PMD to choose a
memory pool based on the packet's length.

On multiple mempool support enabled, populate mempool array and
also print pool name on which packet is received.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
---
 app/test-pmd/testpmd.c | 34 ++++++++++++++++++++++++----------
 app/test-pmd/testpmd.h |  3 +++
 app/test-pmd/util.c    |  4 ++--
 3 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index de6ad00138..2ce9953c76 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2624,6 +2624,7 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
 {
 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
+	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
 	unsigned int i, mp_n;
 	int ret;
 
@@ -2645,16 +2646,29 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		 */
 		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
 		mpx = mbuf_pool_find(socket_id, mp_n);
-		/* Handle zero as mbuf data buffer size. */
-		rx_seg->length = rx_pkt_seg_lengths[i] ?
-				   rx_pkt_seg_lengths[i] :
-				   mbuf_data_size[mp_n];
-		rx_seg->offset = i < rx_pkt_nb_offs ?
-				   rx_pkt_seg_offsets[i] : 0;
-		rx_seg->mp = mpx ? mpx : mp;
-	}
-	rx_conf->rx_nseg = rx_pkt_nb_segs;
-	rx_conf->rx_seg = rx_useg;
+		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+			/**
+			 * On Segment length zero, update length as,
+			 *      buffer size - headroom size
+			 * to make sure enough space is accomidate for header.
+			 */
+			rx_seg->length = rx_pkt_seg_lengths[i] ?
+					 rx_pkt_seg_lengths[i] :
+					 mbuf_data_size[mp_n] - RTE_PKTMBUF_HEADROOM;
+			rx_seg->offset = i < rx_pkt_nb_offs ?
+					 rx_pkt_seg_offsets[i] : 0;
+			rx_seg->mp = mpx ? mpx : mp;
+		} else {
+			rx_mempool[i] = mpx ? mpx : mp;
+		}
+	}
+	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+		rx_conf->rx_nseg = rx_pkt_nb_segs;
+		rx_conf->rx_seg = rx_useg;
+	} else {
+		rx_conf->rx_mempools = rx_mempool;
+		rx_conf->rx_nmempool = rx_pkt_nb_segs;
+	}
 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
 				    socket_id, rx_conf, NULL);
 	rx_conf->rx_seg = NULL;
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index cbbc7cc350..824de36ef1 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -80,6 +80,9 @@ extern uint8_t cl_quit;
 
 #define MIN_TOTAL_NUM_MBUFS 1024
 
+/* Maximum number of pools supprted per Rx queue */
+#define MAX_MEMPOOL 8
+
 typedef uint8_t  lcoreid_t;
 typedef uint16_t portid_t;
 typedef uint16_t queueid_t;
diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index fd98e8b51d..f9df5f69ef 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
 				 print_buf, buf_size, &cur_len);
 		MKDUMPSTR(print_buf, buf_size, cur_len,
-			  " - type=0x%04x - length=%u - nb_segs=%d",
-			  eth_type, (unsigned int) mb->pkt_len,
+			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
+			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
 		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
-- 
2.30.2


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v7 2/4] ethdev: support mulitiple mbuf pools per Rx queue
  2022-10-07 14:37           ` [PATCH v7 2/4] ethdev: support mulitiple mbuf pools per Rx queue Andrew Rybchenko
@ 2022-10-07 16:08             ` Thomas Monjalon
  2022-10-07 16:18               ` Stephen Hemminger
  2022-10-07 17:30               ` Andrew Rybchenko
  0 siblings, 2 replies; 75+ messages in thread
From: Thomas Monjalon @ 2022-10-07 16:08 UTC (permalink / raw)
  To: Hanumanth Pothula, Andrew Rybchenko; +Cc: Ferruh Yigit, dev

07/10/2022 16:37, Andrew Rybchenko:
> From: Hanumanth Pothula <hpothula@marvell.com>
> 
> Some of the HW has support for choosing memory pools based on the
> packet's size. The capability allows to choose a memory pool based
> on the packet's length.

The second sentence is redundant.

> This is often useful for saving the memory where the application
> can create a different pool to steer the specific size of the
> packet, thus enabling more efficient usage of memory.
[...]
> +* **Added support for mulitiple mbuf pools per ethdev Rx queue.**

mulitiple -> multiple

> +
> +  * Added support for multiple mbuf pools per Rx queue. The capability allows

No need to repeat the title.

> +    application to provide many mempools of different size and PMD to choose
> +    a memory pool based on the packet's length and/or Rx buffers availability.
[...]
> +	/* Ensure that we have one and only one source of Rx buffers */
> +	if ((mp != NULL) +

+ operator?
Are we sure a boolean is always translated as 1?

> +	    (rx_conf != NULL && rx_conf->rx_nseg > 0) +
> +	    (rx_conf != NULL && rx_conf->rx_nmempool > 0) != 1) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Ambiguous Rx mempools configuration\n");
> +		return -EINVAL;
> +	}
[...]
> @@ -1067,6 +1067,24 @@ struct rte_eth_rxconf {
>  	 */
>  	union rte_eth_rxseg *rx_seg;
>  
> +	/**
> +	 * Array of mempools to allocate Rx buffers from.
> +	 *
> +	 * This provides support for multiple mbuf pools per Rx queue.
> +	 * The capability is reported in device info via positive
> +	 * max_rx_mempools.
> +	 *
> +	 * It could be useful for more efficient usage of memory when an
> +	 * application creates different mempools to steer the specific
> +	 * size of the packet.
> +	 *
> +	 * Note that if Rx scatter is enabled, a packet may be delivered using
> +	 * a chain of mbufs obtained from single mempool or multiple mempools
> +	 * based on the NIC implementation.
> +	 */
> +	struct rte_mempool **rx_mempools;
> +	uint16_t rx_nmempool; /** < Number of Rx mempools */

The commit message suggests a configuration per packet size.
I guess it is not configurable in ethdev API?
If it is hard-configured in the HW or the driver only,
it should be specified here.

[...]
> +	/**
> +	 * Maximum number of Rx mempools supported per Rx queue.
> +	 *
> +	 * Value greater than 0 means that the driver supports Rx queue
> +	 * mempools specification via rx_conf->rx_mempools.
> +	 */
> +	uint16_t max_rx_mempools;




^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v7 2/4] ethdev: support mulitiple mbuf pools per Rx queue
  2022-10-07 16:08             ` Thomas Monjalon
@ 2022-10-07 16:18               ` Stephen Hemminger
  2022-10-07 16:20                 ` Stephen Hemminger
  2022-10-07 17:30               ` Andrew Rybchenko
  1 sibling, 1 reply; 75+ messages in thread
From: Stephen Hemminger @ 2022-10-07 16:18 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: Hanumanth Pothula, Andrew Rybchenko, Ferruh Yigit, dev

On Fri, 07 Oct 2022 18:08:57 +0200
Thomas Monjalon <thomas@monjalon.net> wrote:

> > +	/* Ensure that we have one and only one source of Rx buffers */
> > +	if ((mp != NULL) +  
> 
> + operator?
> Are we sure a boolean is always translated as 1?

Yes, it is likely part of C standard.

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v7 2/4] ethdev: support mulitiple mbuf pools per Rx queue
  2022-10-07 16:18               ` Stephen Hemminger
@ 2022-10-07 16:20                 ` Stephen Hemminger
  2022-10-07 16:33                   ` Andrew Rybchenko
  0 siblings, 1 reply; 75+ messages in thread
From: Stephen Hemminger @ 2022-10-07 16:20 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: Hanumanth Pothula, Andrew Rybchenko, Ferruh Yigit, dev

On Fri, 7 Oct 2022 09:18:14 -0700
Stephen Hemminger <stephen@networkplumber.org> wrote:

> On Fri, 07 Oct 2022 18:08:57 +0200
> Thomas Monjalon <thomas@monjalon.net> wrote:
> 
> > > +	/* Ensure that we have one and only one source of Rx buffers */
> > > +	if ((mp != NULL) +    
> > 
> > + operator?
> > Are we sure a boolean is always translated as 1?  
> 
> Yes, it is likely part of C standard.


Found it: https://en.cppreference.com/w/c/language/operator_comparison
	The type of any equality operator expression is int, and its value (which is not an lvalue) is 1
	when the specified relationship holds true and ​0​ when the specified relationship does not hold.

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v7 2/4] ethdev: support mulitiple mbuf pools per Rx queue
  2022-10-07 16:20                 ` Stephen Hemminger
@ 2022-10-07 16:33                   ` Andrew Rybchenko
  0 siblings, 0 replies; 75+ messages in thread
From: Andrew Rybchenko @ 2022-10-07 16:33 UTC (permalink / raw)
  To: Stephen Hemminger, Thomas Monjalon; +Cc: Hanumanth Pothula, Ferruh Yigit, dev

On 10/7/22 19:20, Stephen Hemminger wrote:
> On Fri, 7 Oct 2022 09:18:14 -0700
> Stephen Hemminger <stephen@networkplumber.org> wrote:
> 
>> On Fri, 07 Oct 2022 18:08:57 +0200
>> Thomas Monjalon <thomas@monjalon.net> wrote:
>>
>>>> +	/* Ensure that we have one and only one source of Rx buffers */
>>>> +	if ((mp != NULL) +
>>>
>>> + operator?
>>> Are we sure a boolean is always translated as 1?
>>
>> Yes, it is likely part of C standard.
> 
> 
> Found it: https://en.cppreference.com/w/c/language/operator_comparison
> 	The type of any equality operator expression is int, and its value (which is not an lvalue) is 1
> 	when the specified relationship holds true and ​0​ when the specified relationship does not hold.

Many thanks, Stephen.


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v8 0/4] ethdev: support mulitiple mbuf pools per Rx queue
  2022-10-06 17:01       ` [PATCH v5 1/3] ethdev: support " Hanumanth Pothula
                           ` (4 preceding siblings ...)
  2022-10-07 14:37         ` [PATCH v7 0/4] " Andrew Rybchenko
@ 2022-10-07 17:29         ` Andrew Rybchenko
  2022-10-07 17:29           ` [PATCH v8 1/4] ethdev: factor out helper function to check Rx mempool Andrew Rybchenko
                             ` (4 more replies)
  5 siblings, 5 replies; 75+ messages in thread
From: Andrew Rybchenko @ 2022-10-07 17:29 UTC (permalink / raw)
  To: Hanumanth Pothula, Thomas Monjalon, Ferruh Yigit; +Cc: dev

I'm not sure in testpmd patch. Review would be useful and may be we
should postpone it to rc2.

v8:
 - Process review notes
v7:
 - Drop RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL offload which seems to be
   unnecessary. Positive max_rx_mempools in dev_info is sufficient to
   indicate that the capability is support and positive number of
   mempools in Rx configuration is sufficient to request it.
 - Add helper patch to factor out Rx mempool check to be shared
   for single mempool, buffer split and multiple mempools case.
 - Refine check for a way to provide Rx buffers to be one and only one.
   Either single mempool, or buffer split, or multi mempool.
 - Drop feature advertisement in net/cnxk patch since there is no
   such feature defined yet. I have no strong opinion if a new feature
   is required or not.
v6:
 - Updated release notes, release_22_11.rst.
v5:
 - Declared memory pools as struct rte_mempool **rx_mempools rather than
   as struct rte_mempool *mp.
 - Added the feature in release notes.
 - Updated conditions and strings as per review comments.
v4:
 - Renamed Offload capability name from RTE_ETH_RX_OFFLOAD_BUFFER_SORT
   to RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL.
 - In struct rte_eth_rxconf, defined new pointer, which holds array of
   type struct rte_eth_rx_mempool(memory pools). This array is used
   by PMD to program multiple mempools.
v3:
 - Implemented Pool Sort capability as new Rx offload capability,
   RTE_ETH_RX_OFFLOAD_BUFFER_SORT.
v2:
 - Along with spec changes, uploading testpmd and driver changes.

Andrew Rybchenko (1):
  ethdev: factor out helper function to check Rx mempool

Hanumanth Pothula (3):
  ethdev: support multiple mbuf pools per Rx queue
  net/cnxk: support mulitiple mbuf pools per Rx queue
  app/testpmd: support mulitiple mbuf pools per Rx queue

 app/test-pmd/testpmd.c                 |  34 ++++--
 app/test-pmd/testpmd.h                 |   3 +
 app/test-pmd/util.c                    |   4 +-
 doc/guides/rel_notes/release_22_11.rst |   6 +
 drivers/net/cnxk/cnxk_ethdev.c         |  84 +++++++++++--
 drivers/net/cnxk/cnxk_ethdev.h         |   2 +
 drivers/net/cnxk/cnxk_ethdev_ops.c     |   3 +
 lib/ethdev/rte_ethdev.c                | 157 +++++++++++++++++--------
 lib/ethdev/rte_ethdev.h                |  29 +++++
 9 files changed, 253 insertions(+), 69 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v8 1/4] ethdev: factor out helper function to check Rx mempool
  2022-10-07 17:29         ` [PATCH v8 0/4] ethdev: " Andrew Rybchenko
@ 2022-10-07 17:29           ` Andrew Rybchenko
  2022-10-07 17:29           ` [PATCH v8 2/4] ethdev: support multiple mbuf pools per Rx queue Andrew Rybchenko
                             ` (3 subsequent siblings)
  4 siblings, 0 replies; 75+ messages in thread
From: Andrew Rybchenko @ 2022-10-07 17:29 UTC (permalink / raw)
  To: Thomas Monjalon, Ferruh Yigit; +Cc: dev

Avoid Rx mempool checks duplication logic.

Signed-off-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
 lib/ethdev/rte_ethdev.c | 82 +++++++++++++++++++++--------------------
 1 file changed, 42 insertions(+), 40 deletions(-)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 01fac713a2..b3dba291e7 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1648,6 +1648,36 @@ rte_eth_dev_is_removed(uint16_t port_id)
 	return ret;
 }
 
+static int
+rte_eth_check_rx_mempool(struct rte_mempool *mp, uint16_t offset,
+			 uint16_t min_length)
+{
+	uint16_t data_room_size;
+
+	/*
+	 * Check the size of the mbuf data buffer, this value
+	 * must be provided in the private data of the memory pool.
+	 * First check that the memory pool(s) has a valid private data.
+	 */
+	if (mp->private_data_size <
+			sizeof(struct rte_pktmbuf_pool_private)) {
+		RTE_ETHDEV_LOG(ERR, "%s private_data_size %u < %u\n",
+			mp->name, mp->private_data_size,
+			(unsigned int)
+			sizeof(struct rte_pktmbuf_pool_private));
+		return -ENOSPC;
+	}
+	data_room_size = rte_pktmbuf_data_room_size(mp);
+	if (data_room_size < offset + min_length) {
+		RTE_ETHDEV_LOG(ERR,
+			       "%s mbuf_data_room_size %u < %u (%u + %u)\n",
+			       mp->name, data_room_size,
+			       offset + min_length, offset, min_length);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int
 rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 			     uint16_t n_seg, uint32_t *mbp_buf_size,
@@ -1657,6 +1687,7 @@ rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 	struct rte_mempool *mp_first;
 	uint32_t offset_mask;
 	uint16_t seg_idx;
+	int ret;
 
 	if (n_seg > seg_capa->max_nseg) {
 		RTE_ETHDEV_LOG(ERR,
@@ -1696,25 +1727,14 @@ rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 				return -EINVAL;
 			}
 		}
-		if (mpl->private_data_size <
-			sizeof(struct rte_pktmbuf_pool_private)) {
-			RTE_ETHDEV_LOG(ERR,
-				       "%s private_data_size %u < %u\n",
-				       mpl->name, mpl->private_data_size,
-				       (unsigned int)sizeof
-					(struct rte_pktmbuf_pool_private));
-			return -ENOSPC;
-		}
+
 		offset += seg_idx != 0 ? 0 : RTE_PKTMBUF_HEADROOM;
 		*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
 		length = length != 0 ? length : *mbp_buf_size;
-		if (*mbp_buf_size < length + offset) {
-			RTE_ETHDEV_LOG(ERR,
-				       "%s mbuf_data_room_size %u < %u (segment length=%u + segment offset=%u)\n",
-				       mpl->name, *mbp_buf_size,
-				       length + offset, length, offset);
-			return -EINVAL;
-		}
+
+		ret = rte_eth_check_rx_mempool(mpl, offset, length);
+		if (ret != 0)
+			return ret;
 	}
 	return 0;
 }
@@ -1753,31 +1773,13 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 				       "Ambiguous segment configuration\n");
 			return -EINVAL;
 		}
-		/*
-		 * Check the size of the mbuf data buffer, this value
-		 * must be provided in the private data of the memory pool.
-		 * First check that the memory pool(s) has a valid private data.
-		 */
-		if (mp->private_data_size <
-				sizeof(struct rte_pktmbuf_pool_private)) {
-			RTE_ETHDEV_LOG(ERR, "%s private_data_size %u < %u\n",
-				mp->name, mp->private_data_size,
-				(unsigned int)
-				sizeof(struct rte_pktmbuf_pool_private));
-			return -ENOSPC;
-		}
+
+		ret = rte_eth_check_rx_mempool(mp, RTE_PKTMBUF_HEADROOM,
+					       dev_info.min_rx_bufsize);
+		if (ret != 0)
+			return ret;
+
 		mbp_buf_size = rte_pktmbuf_data_room_size(mp);
-		if (mbp_buf_size < dev_info.min_rx_bufsize +
-				   RTE_PKTMBUF_HEADROOM) {
-			RTE_ETHDEV_LOG(ERR,
-				       "%s mbuf_data_room_size %u < %u (RTE_PKTMBUF_HEADROOM=%u + min_rx_bufsize(dev)=%u)\n",
-				       mp->name, mbp_buf_size,
-				       RTE_PKTMBUF_HEADROOM +
-				       dev_info.min_rx_bufsize,
-				       RTE_PKTMBUF_HEADROOM,
-				       dev_info.min_rx_bufsize);
-			return -EINVAL;
-		}
 	} else {
 		const struct rte_eth_rxseg_split *rx_seg;
 		uint16_t n_seg;
-- 
2.30.2


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v8 2/4] ethdev: support multiple mbuf pools per Rx queue
  2022-10-07 17:29         ` [PATCH v8 0/4] ethdev: " Andrew Rybchenko
  2022-10-07 17:29           ` [PATCH v8 1/4] ethdev: factor out helper function to check Rx mempool Andrew Rybchenko
@ 2022-10-07 17:29           ` Andrew Rybchenko
  2022-10-07 18:35             ` Thomas Monjalon
  2022-10-07 17:29           ` [PATCH v8 3/4] net/cnxk: support mulitiple " Andrew Rybchenko
                             ` (2 subsequent siblings)
  4 siblings, 1 reply; 75+ messages in thread
From: Andrew Rybchenko @ 2022-10-07 17:29 UTC (permalink / raw)
  To: Thomas Monjalon, Ferruh Yigit; +Cc: dev, Hanumanth Pothula

From: Hanumanth Pothula <hpothula@marvell.com>

Some of the HW has support for choosing memory pools based on the
packet's size.

This is often useful for saving the memory where the application
can create a different pool to steer the specific size of the
packet, thus enabling more efficient usage of memory.

For example, let's say HW has a capability of three pools,
 - pool-1 size is 2K
 - pool-2 size is > 2K and < 4K
 - pool-3 size is > 4K
Here,
        pool-1 can accommodate packets with sizes < 2K
        pool-2 can accommodate packets with sizes > 2K and < 4K
        pool-3 can accommodate packets with sizes > 4K

With multiple mempool capability enabled in SW, an application may
create three pools of different sizes and send them to PMD. Allowing
PMD to program HW based on the packet lengths. So that packets with
less than 2K are received on pool-1, packets with lengths between 2K
and 4K are received on pool-2 and finally packets greater than 4K
are received on pool-3.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
Signed-off-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
 doc/guides/rel_notes/release_22_11.rst |  6 ++
 lib/ethdev/rte_ethdev.c                | 81 ++++++++++++++++++++++----
 lib/ethdev/rte_ethdev.h                | 29 +++++++++
 3 files changed, 105 insertions(+), 11 deletions(-)

diff --git a/doc/guides/rel_notes/release_22_11.rst b/doc/guides/rel_notes/release_22_11.rst
index e165c45367..df32a6a5e7 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -92,6 +92,12 @@ New Features
   ``rte_eth_cman_config_set()``, ``rte_eth_cman_info_get()``
   to support congestion management.
 
+* **Added support for mulitiple mbuf pools per ethdev Rx queue.**
+
+  The capability allows application to provide many mempools of different
+  size and PMD and/or NIC to choose a memory pool based on the packet's
+  length and/or Rx buffers availability.
+
 * **Updated Intel iavf driver.**
 
   * Added flow subscription support.
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index b3dba291e7..979b02356e 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1739,6 +1739,41 @@ rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 	return 0;
 }
 
+static int
+rte_eth_rx_queue_check_mempools(struct rte_mempool **rx_mempools,
+			       uint16_t n_mempools, uint32_t *min_buf_size,
+			       const struct rte_eth_dev_info *dev_info)
+{
+	uint16_t pool_idx;
+	int ret;
+
+	if (n_mempools > dev_info->max_rx_mempools) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Too many Rx mempools %u vs maximum %u\n",
+			       n_mempools, dev_info->max_rx_mempools);
+		return -EINVAL;
+	}
+
+	for (pool_idx = 0; pool_idx < n_mempools; pool_idx++) {
+		struct rte_mempool *mp = rx_mempools[pool_idx];
+
+		if (mp == NULL) {
+			RTE_ETHDEV_LOG(ERR, "null Rx mempool pointer\n");
+			return -EINVAL;
+		}
+
+		ret = rte_eth_check_rx_mempool(mp, RTE_PKTMBUF_HEADROOM,
+					       dev_info->min_rx_bufsize);
+		if (ret != 0)
+			return ret;
+
+		*min_buf_size = RTE_MIN(*min_buf_size,
+					rte_pktmbuf_data_room_size(mp));
+	}
+
+	return 0;
+}
+
 int
 rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		       uint16_t nb_rx_desc, unsigned int socket_id,
@@ -1746,7 +1781,8 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		       struct rte_mempool *mp)
 {
 	int ret;
-	uint32_t mbp_buf_size;
+	uint64_t rx_offloads;
+	uint32_t mbp_buf_size = UINT32_MAX;
 	struct rte_eth_dev *dev;
 	struct rte_eth_dev_info dev_info;
 	struct rte_eth_rxconf local_conf;
@@ -1766,35 +1802,42 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	if (ret != 0)
 		return ret;
 
+	rx_offloads = dev->data->dev_conf.rxmode.offloads;
+	if (rx_conf != NULL)
+		rx_offloads |= rx_conf->offloads;
+
+	/* Ensure that we have one and only one source of Rx buffers */
+	if ((mp != NULL) +
+	    (rx_conf != NULL && rx_conf->rx_nseg > 0) +
+	    (rx_conf != NULL && rx_conf->rx_nmempool > 0) != 1) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Ambiguous Rx mempools configuration\n");
+		return -EINVAL;
+	}
+
 	if (mp != NULL) {
 		/* Single pool configuration check. */
-		if (rx_conf != NULL && rx_conf->rx_nseg != 0) {
-			RTE_ETHDEV_LOG(ERR,
-				       "Ambiguous segment configuration\n");
-			return -EINVAL;
-		}
-
 		ret = rte_eth_check_rx_mempool(mp, RTE_PKTMBUF_HEADROOM,
 					       dev_info.min_rx_bufsize);
 		if (ret != 0)
 			return ret;
 
 		mbp_buf_size = rte_pktmbuf_data_room_size(mp);
-	} else {
+	} else if (rx_conf != NULL && rx_conf->rx_nseg > 0) {
 		const struct rte_eth_rxseg_split *rx_seg;
 		uint16_t n_seg;
 
 		/* Extended multi-segment configuration check. */
-		if (rx_conf == NULL || rx_conf->rx_seg == NULL || rx_conf->rx_nseg == 0) {
+		if (rx_conf->rx_seg == NULL) {
 			RTE_ETHDEV_LOG(ERR,
-				       "Memory pool is null and no extended configuration provided\n");
+				       "Memory pool is null and no multi-segment configuration provided\n");
 			return -EINVAL;
 		}
 
 		rx_seg = (const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
 		n_seg = rx_conf->rx_nseg;
 
-		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+		if (rx_offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
 			ret = rte_eth_rx_queue_check_split(rx_seg, n_seg,
 							   &mbp_buf_size,
 							   &dev_info);
@@ -1804,6 +1847,22 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 			RTE_ETHDEV_LOG(ERR, "No Rx segmentation offload configured\n");
 			return -EINVAL;
 		}
+	} else if (rx_conf != NULL && rx_conf->rx_nmempool > 0) {
+		/* Extended multi-pool configuration check. */
+		if (rx_conf->rx_mempools == NULL) {
+			RTE_ETHDEV_LOG(ERR, "Memory pools array is null\n");
+			return -EINVAL;
+		}
+
+		ret = rte_eth_rx_queue_check_mempools(rx_conf->rx_mempools,
+						     rx_conf->rx_nmempool,
+						     &mbp_buf_size,
+						     &dev_info);
+		if (ret != 0)
+			return ret;
+	} else {
+		RTE_ETHDEV_LOG(ERR, "Missing Rx mempool configuration\n");
+		return -EINVAL;
 	}
 
 	/* Use default specified by driver, if nb_rx_desc is zero */
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 2530eda7c4..d1e44ffa5f 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1067,6 +1067,28 @@ struct rte_eth_rxconf {
 	 */
 	union rte_eth_rxseg *rx_seg;
 
+	/**
+	 * Array of mempools to allocate Rx buffers from.
+	 *
+	 * This provides support for multiple mbuf pools per Rx queue.
+	 * The capability is reported in device info via positive
+	 * max_rx_mempools.
+	 *
+	 * It could be useful for more efficient usage of memory when an
+	 * application creates different mempools to steer the specific
+	 * size of the packet.
+	 *
+	 * If many mempools are specified, packets received using Rx
+	 * burst may belong to any provided mempool. From ethdev user point
+	 * of view it is undefined how PMD/NIC chooses mempool for a packet.
+	 *
+	 * If Rx scatter is enabled, a packet may be delivered using a chain
+	 * of mbufs obtained from single mempool or multiple mempools based
+	 * on the NIC implementation.
+	 */
+	struct rte_mempool **rx_mempools;
+	uint16_t rx_nmempool; /** < Number of Rx mempools */
+
 	uint64_t reserved_64s[2]; /**< Reserved for future fields */
 	void *reserved_ptrs[2];   /**< Reserved for future fields */
 };
@@ -1614,6 +1636,13 @@ struct rte_eth_dev_info {
 	/** Configured number of Rx/Tx queues */
 	uint16_t nb_rx_queues; /**< Number of Rx queues. */
 	uint16_t nb_tx_queues; /**< Number of Tx queues. */
+	/**
+	 * Maximum number of Rx mempools supported per Rx queue.
+	 *
+	 * Value greater than 0 means that the driver supports Rx queue
+	 * mempools specification via rx_conf->rx_mempools.
+	 */
+	uint16_t max_rx_mempools;
 	/** Rx parameter recommendations */
 	struct rte_eth_dev_portconf default_rxportconf;
 	/** Tx parameter recommendations */
-- 
2.30.2


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v8 3/4] net/cnxk: support mulitiple mbuf pools per Rx queue
  2022-10-07 17:29         ` [PATCH v8 0/4] ethdev: " Andrew Rybchenko
  2022-10-07 17:29           ` [PATCH v8 1/4] ethdev: factor out helper function to check Rx mempool Andrew Rybchenko
  2022-10-07 17:29           ` [PATCH v8 2/4] ethdev: support multiple mbuf pools per Rx queue Andrew Rybchenko
@ 2022-10-07 17:29           ` Andrew Rybchenko
  2022-10-07 17:29           ` [PATCH v8 4/4] app/testpmd: " Andrew Rybchenko
  2022-10-08 20:38           ` [PATCH v8 0/4] ethdev: support mulitiple " Thomas Monjalon
  4 siblings, 0 replies; 75+ messages in thread
From: Andrew Rybchenko @ 2022-10-07 17:29 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
  Cc: dev, Hanumanth Pothula

From: Hanumanth Pothula <hpothula@marvell.com>

Presently, HW is programmed only to receive packets from LPB pool.
Making all packets received from LPB pool.

But, CNXK HW supports two pools,
 - SPB -> packets with smaller size (less than 4K)
 - LPB -> packets with bigger size (greater than 4K)

Patch enables multiple mempool capability, pool is selected based
on the packet's length. So, basically, PMD programs HW for receiving
packets from both SPB and LPB pools based on the packet's length.

This is achieved by enabling rx multiple mempool offload,
RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL. This allows the application to send
more than one pool(in our case two) to the driver, with different
segment(packet) lengths, which helps the driver to configure both
pools based on segment lengths.

This is often useful for saving the memory where the application
can create a different pool to steer the specific size of the
packet, thus enabling effective use of memory.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
---
 drivers/net/cnxk/cnxk_ethdev.c     | 84 ++++++++++++++++++++++++++----
 drivers/net/cnxk/cnxk_ethdev.h     |  2 +
 drivers/net/cnxk/cnxk_ethdev_ops.c |  3 ++
 3 files changed, 80 insertions(+), 9 deletions(-)

diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c
index 2cb48ba152..bb27cc87fd 100644
--- a/drivers/net/cnxk/cnxk_ethdev.c
+++ b/drivers/net/cnxk/cnxk_ethdev.c
@@ -541,6 +541,58 @@ cnxk_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid)
 	plt_free(txq_sp);
 }
 
+static int
+cnxk_nix_process_rx_conf(const struct rte_eth_rxconf *rx_conf,
+			 struct rte_mempool **lpb_pool,
+			 struct rte_mempool **spb_pool)
+{
+	struct rte_mempool *pool0;
+	struct rte_mempool *pool1;
+	struct rte_mempool **mp = rx_conf->rx_mempools;
+	const char *platform_ops;
+	struct rte_mempool_ops *ops;
+
+	if (*lpb_pool ||
+	    rx_conf->rx_nmempool != CNXK_NIX_NUM_POOLS_MAX) {
+		plt_err("invalid arguments");
+		return -EINVAL;
+	}
+
+	if (mp == NULL || mp[0] == NULL || mp[1] == NULL) {
+		plt_err("invalid memory pools\n");
+		return -EINVAL;
+	}
+
+	pool0 = mp[0];
+	pool1 = mp[1];
+
+	if (pool0->elt_size > pool1->elt_size) {
+		*lpb_pool = pool0;
+		*spb_pool = pool1;
+
+	} else {
+		*lpb_pool = pool1;
+		*spb_pool = pool0;
+	}
+
+	if ((*spb_pool)->pool_id == 0) {
+		plt_err("Invalid pool_id");
+		return -EINVAL;
+	}
+
+	platform_ops = rte_mbuf_platform_mempool_ops();
+	ops = rte_mempool_get_ops((*spb_pool)->ops_index);
+	if (strncmp(ops->name, platform_ops, RTE_MEMPOOL_OPS_NAMESIZE)) {
+		plt_err("mempool ops should be of cnxk_npa type");
+		return -EINVAL;
+	}
+
+	plt_info("spb_pool:%s lpb_pool:%s lpb_len:%u spb_len:%u\n", (*spb_pool)->name,
+		 (*lpb_pool)->name, (*lpb_pool)->elt_size, (*spb_pool)->elt_size);
+
+	return 0;
+}
+
 int
 cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			uint32_t nb_desc, uint16_t fp_rx_q_sz,
@@ -557,6 +609,8 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	uint16_t first_skip;
 	int rc = -EINVAL;
 	size_t rxq_sz;
+	struct rte_mempool *lpb_pool = mp;
+	struct rte_mempool *spb_pool = NULL;
 
 	/* Sanity checks */
 	if (rx_conf->rx_deferred_start == 1) {
@@ -564,15 +618,21 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 		goto fail;
 	}
 
+	if (rx_conf->rx_nmempool > 0) {
+		rc = cnxk_nix_process_rx_conf(rx_conf, &lpb_pool, &spb_pool);
+		if (rc)
+			goto fail;
+	}
+
 	platform_ops = rte_mbuf_platform_mempool_ops();
 	/* This driver needs cnxk_npa mempool ops to work */
-	ops = rte_mempool_get_ops(mp->ops_index);
+	ops = rte_mempool_get_ops(lpb_pool->ops_index);
 	if (strncmp(ops->name, platform_ops, RTE_MEMPOOL_OPS_NAMESIZE)) {
 		plt_err("mempool ops should be of cnxk_npa type");
 		goto fail;
 	}
 
-	if (mp->pool_id == 0) {
+	if (lpb_pool->pool_id == 0) {
 		plt_err("Invalid pool_id");
 		goto fail;
 	}
@@ -589,13 +649,13 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Its a no-op when inline device is not used */
 	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY ||
 	    dev->tx_offloads & RTE_ETH_TX_OFFLOAD_SECURITY)
-		roc_nix_inl_dev_xaq_realloc(mp->pool_id);
+		roc_nix_inl_dev_xaq_realloc(lpb_pool->pool_id);
 
 	/* Increase CQ size to Aura size to avoid CQ overflow and
 	 * then CPT buffer leak.
 	 */
 	if (dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY)
-		nb_desc = nix_inl_cq_sz_clamp_up(nix, mp, nb_desc);
+		nb_desc = nix_inl_cq_sz_clamp_up(nix, lpb_pool, nb_desc);
 
 	/* Setup ROC CQ */
 	cq = &dev->cqs[qid];
@@ -611,17 +671,17 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	rq = &dev->rqs[qid];
 	rq->qid = qid;
 	rq->cqid = cq->qid;
-	rq->aura_handle = mp->pool_id;
+	rq->aura_handle = lpb_pool->pool_id;
 	rq->flow_tag_width = 32;
 	rq->sso_ena = false;
 
 	/* Calculate first mbuf skip */
 	first_skip = (sizeof(struct rte_mbuf));
 	first_skip += RTE_PKTMBUF_HEADROOM;
-	first_skip += rte_pktmbuf_priv_size(mp);
+	first_skip += rte_pktmbuf_priv_size(lpb_pool);
 	rq->first_skip = first_skip;
 	rq->later_skip = sizeof(struct rte_mbuf);
-	rq->lpb_size = mp->elt_size;
+	rq->lpb_size = lpb_pool->elt_size;
 	if (roc_errata_nix_no_meta_aura())
 		rq->lpb_drop_ena = !(dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SECURITY);
 
@@ -629,6 +689,12 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	if (roc_nix_inl_inb_is_enabled(nix))
 		rq->ipsech_ena = true;
 
+	if (spb_pool) {
+		rq->spb_ena = 1;
+		rq->spb_aura_handle = spb_pool->pool_id;
+		rq->spb_size = spb_pool->elt_size;
+	}
+
 	rc = roc_nix_rq_init(&dev->nix, rq, !!eth_dev->data->dev_started);
 	if (rc) {
 		plt_err("Failed to init roc rq for rq=%d, rc=%d", qid, rc);
@@ -651,7 +717,7 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 	/* Queue config should reflect global offloads */
 	rxq_sp->qconf.conf.rx.offloads = dev->rx_offloads;
 	rxq_sp->qconf.nb_desc = nb_desc;
-	rxq_sp->qconf.mp = mp;
+	rxq_sp->qconf.mp = lpb_pool;
 	rxq_sp->tc = 0;
 	rxq_sp->tx_pause = (dev->fc_cfg.mode == RTE_ETH_FC_FULL ||
 			    dev->fc_cfg.mode == RTE_ETH_FC_TX_PAUSE);
@@ -670,7 +736,7 @@ cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			goto free_mem;
 	}
 
-	plt_nix_dbg("rq=%d pool=%s nb_desc=%d->%d", qid, mp->name, nb_desc,
+	plt_nix_dbg("rq=%d pool=%s nb_desc=%d->%d", qid, lpb_pool->name, nb_desc,
 		    cq->nb_desc);
 
 	/* Store start of fast path area */
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 5204c46244..d282f79a9a 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -44,6 +44,8 @@
 #define CNXK_NIX_RX_DEFAULT_RING_SZ 4096
 /* Max supported SQB count */
 #define CNXK_NIX_TX_MAX_SQB 512
+/* LPB & SPB */
+#define CNXK_NIX_NUM_POOLS_MAX 2
 
 /* If PTP is enabled additional SEND MEM DESC is required which
  * takes 2 words, hence max 7 iova address are possible
diff --git a/drivers/net/cnxk/cnxk_ethdev_ops.c b/drivers/net/cnxk/cnxk_ethdev_ops.c
index 30d169f799..8f7287161b 100644
--- a/drivers/net/cnxk/cnxk_ethdev_ops.c
+++ b/drivers/net/cnxk/cnxk_ethdev_ops.c
@@ -69,6 +69,9 @@ cnxk_nix_info_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *devinfo)
 	devinfo->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
 			    RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP |
 			    RTE_ETH_DEV_CAPA_FLOW_RULE_KEEP;
+
+	devinfo->max_rx_mempools = CNXK_NIX_NUM_POOLS_MAX;
+
 	return 0;
 }
 
-- 
2.30.2


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v8 4/4] app/testpmd: support mulitiple mbuf pools per Rx queue
  2022-10-07 17:29         ` [PATCH v8 0/4] ethdev: " Andrew Rybchenko
                             ` (2 preceding siblings ...)
  2022-10-07 17:29           ` [PATCH v8 3/4] net/cnxk: support mulitiple " Andrew Rybchenko
@ 2022-10-07 17:29           ` Andrew Rybchenko
       [not found]             ` <PH0PR18MB47500560DC1793F68E7312DDCB5F9@PH0PR18MB4750.namprd18.prod.outlook.com>
  2022-10-17  8:48             ` [PATCH v9 1/1] " Hanumanth Pothula
  2022-10-08 20:38           ` [PATCH v8 0/4] ethdev: support mulitiple " Thomas Monjalon
  4 siblings, 2 replies; 75+ messages in thread
From: Andrew Rybchenko @ 2022-10-07 17:29 UTC (permalink / raw)
  To: Aman Singh, Yuying Zhang; +Cc: dev, Hanumanth Pothula

From: Hanumanth Pothula <hpothula@marvell.com>

Some of the HW has support for choosing memory pools based on the
packet's size. The pool sort capability allows PMD/NIC to choose
a memory pool based on the packet's length.

On multiple mempool support enabled, populate mempool array and
also print pool name on which packet is received.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
---
 app/test-pmd/testpmd.c | 34 ++++++++++++++++++++++++----------
 app/test-pmd/testpmd.h |  3 +++
 app/test-pmd/util.c    |  4 ++--
 3 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index de6ad00138..2ce9953c76 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2624,6 +2624,7 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
 {
 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
+	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
 	unsigned int i, mp_n;
 	int ret;
 
@@ -2645,16 +2646,29 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		 */
 		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
 		mpx = mbuf_pool_find(socket_id, mp_n);
-		/* Handle zero as mbuf data buffer size. */
-		rx_seg->length = rx_pkt_seg_lengths[i] ?
-				   rx_pkt_seg_lengths[i] :
-				   mbuf_data_size[mp_n];
-		rx_seg->offset = i < rx_pkt_nb_offs ?
-				   rx_pkt_seg_offsets[i] : 0;
-		rx_seg->mp = mpx ? mpx : mp;
-	}
-	rx_conf->rx_nseg = rx_pkt_nb_segs;
-	rx_conf->rx_seg = rx_useg;
+		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+			/**
+			 * On Segment length zero, update length as,
+			 *      buffer size - headroom size
+			 * to make sure enough space is accomidate for header.
+			 */
+			rx_seg->length = rx_pkt_seg_lengths[i] ?
+					 rx_pkt_seg_lengths[i] :
+					 mbuf_data_size[mp_n] - RTE_PKTMBUF_HEADROOM;
+			rx_seg->offset = i < rx_pkt_nb_offs ?
+					 rx_pkt_seg_offsets[i] : 0;
+			rx_seg->mp = mpx ? mpx : mp;
+		} else {
+			rx_mempool[i] = mpx ? mpx : mp;
+		}
+	}
+	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+		rx_conf->rx_nseg = rx_pkt_nb_segs;
+		rx_conf->rx_seg = rx_useg;
+	} else {
+		rx_conf->rx_mempools = rx_mempool;
+		rx_conf->rx_nmempool = rx_pkt_nb_segs;
+	}
 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
 				    socket_id, rx_conf, NULL);
 	rx_conf->rx_seg = NULL;
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index cbbc7cc350..2f50a10d1f 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -80,6 +80,9 @@ extern uint8_t cl_quit;
 
 #define MIN_TOTAL_NUM_MBUFS 1024
 
+/* Maximum number of pools supported per Rx queue */
+#define MAX_MEMPOOL 8
+
 typedef uint8_t  lcoreid_t;
 typedef uint16_t portid_t;
 typedef uint16_t queueid_t;
diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index fd98e8b51d..f9df5f69ef 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
 				 print_buf, buf_size, &cur_len);
 		MKDUMPSTR(print_buf, buf_size, cur_len,
-			  " - type=0x%04x - length=%u - nb_segs=%d",
-			  eth_type, (unsigned int) mb->pkt_len,
+			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
+			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
 		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
-- 
2.30.2


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v7 2/4] ethdev: support mulitiple mbuf pools per Rx queue
  2022-10-07 16:08             ` Thomas Monjalon
  2022-10-07 16:18               ` Stephen Hemminger
@ 2022-10-07 17:30               ` Andrew Rybchenko
  1 sibling, 0 replies; 75+ messages in thread
From: Andrew Rybchenko @ 2022-10-07 17:30 UTC (permalink / raw)
  To: Thomas Monjalon, Hanumanth Pothula; +Cc: Ferruh Yigit, dev

On 10/7/22 19:08, Thomas Monjalon wrote:
> 07/10/2022 16:37, Andrew Rybchenko:
>> @@ -1067,6 +1067,24 @@ struct rte_eth_rxconf {
>>   	 */
>>   	union rte_eth_rxseg *rx_seg;
>>   
>> +	/**
>> +	 * Array of mempools to allocate Rx buffers from.
>> +	 *
>> +	 * This provides support for multiple mbuf pools per Rx queue.
>> +	 * The capability is reported in device info via positive
>> +	 * max_rx_mempools.
>> +	 *
>> +	 * It could be useful for more efficient usage of memory when an
>> +	 * application creates different mempools to steer the specific
>> +	 * size of the packet.
>> +	 *
>> +	 * Note that if Rx scatter is enabled, a packet may be delivered using
>> +	 * a chain of mbufs obtained from single mempool or multiple mempools
>> +	 * based on the NIC implementation.
>> +	 */
>> +	struct rte_mempool **rx_mempools;
>> +	uint16_t rx_nmempool; /** < Number of Rx mempools */
> 
> The commit message suggests a configuration per packet size.
> I guess it is not configurable in ethdev API?
> If it is hard-configured in the HW or the driver only,
> it should be specified here.

See v8

> 
> [...]
>> +	/**
>> +	 * Maximum number of Rx mempools supported per Rx queue.
>> +	 *
>> +	 * Value greater than 0 means that the driver supports Rx queue
>> +	 * mempools specification via rx_conf->rx_mempools.
>> +	 */
>> +	uint16_t max_rx_mempools;
> 
> 
> 


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v8 2/4] ethdev: support multiple mbuf pools per Rx queue
  2022-10-07 17:29           ` [PATCH v8 2/4] ethdev: support multiple mbuf pools per Rx queue Andrew Rybchenko
@ 2022-10-07 18:35             ` Thomas Monjalon
  2022-10-07 19:45               ` Andrew Rybchenko
  0 siblings, 1 reply; 75+ messages in thread
From: Thomas Monjalon @ 2022-10-07 18:35 UTC (permalink / raw)
  To: Andrew Rybchenko; +Cc: Ferruh Yigit, dev, Hanumanth Pothula

07/10/2022 19:29, Andrew Rybchenko:
> +* **Added support for mulitiple mbuf pools per ethdev Rx queue.**

mulitiple -> multiple

I can fix when merging.

> +
> +  The capability allows application to provide many mempools of different
> +  size and PMD and/or NIC to choose a memory pool based on the packet's
> +  length and/or Rx buffers availability.
[...] 
> +	/**
> +	 * Array of mempools to allocate Rx buffers from.
> +	 *
> +	 * This provides support for multiple mbuf pools per Rx queue.
> +	 * The capability is reported in device info via positive
> +	 * max_rx_mempools.
> +	 *
> +	 * It could be useful for more efficient usage of memory when an
> +	 * application creates different mempools to steer the specific
> +	 * size of the packet.
> +	 *
> +	 * If many mempools are specified, packets received using Rx
> +	 * burst may belong to any provided mempool. From ethdev user point
> +	 * of view it is undefined how PMD/NIC chooses mempool for a packet.
> +	 *
> +	 * If Rx scatter is enabled, a packet may be delivered using a chain
> +	 * of mbufs obtained from single mempool or multiple mempools based
> +	 * on the NIC implementation.
> +	 */
> +	struct rte_mempool **rx_mempools;
> +	uint16_t rx_nmempool; /** < Number of Rx mempools */

OK, it's clear, thanks.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [EXT] [PATCH v8 4/4] app/testpmd: support mulitiple mbuf pools per Rx queue
       [not found]             ` <PH0PR18MB47500560DC1793F68E7312DDCB5F9@PH0PR18MB4750.namprd18.prod.outlook.com>
@ 2022-10-07 19:43               ` Andrew Rybchenko
  2022-10-07 19:56                 ` Hanumanth Reddy Pothula
  0 siblings, 1 reply; 75+ messages in thread
From: Andrew Rybchenko @ 2022-10-07 19:43 UTC (permalink / raw)
  To: Hanumanth Reddy Pothula; +Cc: dev, Thomas Monjalon, Ferruh Yigit

On 10/7/22 21:16, Hanumanth Reddy Pothula wrote:
> Thanks Andrew for helping me in merging the changes.
> 
> In below if condition, rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT will be always true as are not setting RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT.
> I think we need to do && operation.
> 
> if (rx_pkt_nb_segs <= 1 ||
>       (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
>           rx_conf->rx_seg = NULL;
>           rx_conf->rx_nseg = 0;
>           ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
>                                        nb_rx_desc, socket_id,
>                                        rx_conf, mp);
>           return ret;
>   }

That's exactly what I'm talking about in the cover letter.
I'm not sure in testpmd patch at all. So, I vote for postponing
testpmd part to rc2 stage.

> 
> Applied changes locally with && operation in above if condition and confirmed its working fine.
> 
> Regards,
> Hanumanth
> 
>> -----Original Message-----
>> From: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
>> Sent: Friday, October 7, 2022 10:59 PM
>> To: Aman Singh <aman.deep.singh@intel.com>; Yuying Zhang
>> <yuying.zhang@intel.com>
>> Cc: dev@dpdk.org; Hanumanth Reddy Pothula <hpothula@marvell.com>
>> Subject: [EXT] [PATCH v8 4/4] app/testpmd: support mulitiple mbuf pools per Rx
>> queue
>>
>> External Email
>>
>> ----------------------------------------------------------------------
>> From: Hanumanth Pothula <hpothula@marvell.com>
>>
>> Some of the HW has support for choosing memory pools based on the packet's
>> size. The pool sort capability allows PMD/NIC to choose a memory pool based
>> on the packet's length.
>>
>> On multiple mempool support enabled, populate mempool array and also print
>> pool name on which packet is received.
>>
>> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
>> ---
>>   app/test-pmd/testpmd.c | 34 ++++++++++++++++++++++++----------
>>   app/test-pmd/testpmd.h |  3 +++
>>   app/test-pmd/util.c    |  4 ++--
>>   3 files changed, 29 insertions(+), 12 deletions(-)
>>
>> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
>> de6ad00138..2ce9953c76 100644
>> --- a/app/test-pmd/testpmd.c
>> +++ b/app/test-pmd/testpmd.c
>> @@ -2624,6 +2624,7 @@ rx_queue_setup(uint16_t port_id, uint16_t
>> rx_queue_id,
>>   	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)  {
>>   	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
>> +	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
>>   	unsigned int i, mp_n;
>>   	int ret;
>>
>> @@ -2645,16 +2646,29 @@ rx_queue_setup(uint16_t port_id, uint16_t
>> rx_queue_id,
>>   		 */
>>   		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
>>   		mpx = mbuf_pool_find(socket_id, mp_n);
>> -		/* Handle zero as mbuf data buffer size. */
>> -		rx_seg->length = rx_pkt_seg_lengths[i] ?
>> -				   rx_pkt_seg_lengths[i] :
>> -				   mbuf_data_size[mp_n];
>> -		rx_seg->offset = i < rx_pkt_nb_offs ?
>> -				   rx_pkt_seg_offsets[i] : 0;
>> -		rx_seg->mp = mpx ? mpx : mp;
>> -	}
>> -	rx_conf->rx_nseg = rx_pkt_nb_segs;
>> -	rx_conf->rx_seg = rx_useg;
>> +		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)
>> {
>> +			/**
>> +			 * On Segment length zero, update length as,
>> +			 *      buffer size - headroom size
>> +			 * to make sure enough space is accomidate for header.
>> +			 */
>> +			rx_seg->length = rx_pkt_seg_lengths[i] ?
>> +					 rx_pkt_seg_lengths[i] :
>> +					 mbuf_data_size[mp_n] -
>> RTE_PKTMBUF_HEADROOM;
>> +			rx_seg->offset = i < rx_pkt_nb_offs ?
>> +					 rx_pkt_seg_offsets[i] : 0;
>> +			rx_seg->mp = mpx ? mpx : mp;
>> +		} else {
>> +			rx_mempool[i] = mpx ? mpx : mp;
>> +		}
>> +	}
>> +	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
>> +		rx_conf->rx_nseg = rx_pkt_nb_segs;
>> +		rx_conf->rx_seg = rx_useg;
>> +	} else {
>> +		rx_conf->rx_mempools = rx_mempool;
>> +		rx_conf->rx_nmempool = rx_pkt_nb_segs;
>> +	}
>>   	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
>>   				    socket_id, rx_conf, NULL);
>>   	rx_conf->rx_seg = NULL;
>> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index
>> cbbc7cc350..2f50a10d1f 100644
>> --- a/app/test-pmd/testpmd.h
>> +++ b/app/test-pmd/testpmd.h
>> @@ -80,6 +80,9 @@ extern uint8_t cl_quit;
>>
>>   #define MIN_TOTAL_NUM_MBUFS 1024
>>
>> +/* Maximum number of pools supported per Rx queue */ #define
>> +MAX_MEMPOOL 8
>> +
>>   typedef uint8_t  lcoreid_t;
>>   typedef uint16_t portid_t;
>>   typedef uint16_t queueid_t;
>> diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c index
>> fd98e8b51d..f9df5f69ef 100644
>> --- a/app/test-pmd/util.c
>> +++ b/app/test-pmd/util.c
>> @@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
>> struct rte_mbuf *pkts[],
>>   		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
>>   				 print_buf, buf_size, &cur_len);
>>   		MKDUMPSTR(print_buf, buf_size, cur_len,
>> -			  " - type=0x%04x - length=%u - nb_segs=%d",
>> -			  eth_type, (unsigned int) mb->pkt_len,
>> +			  " - pool=%s - type=0x%04x - length=%u -
>> nb_segs=%d",
>> +			  mb->pool->name, eth_type, (unsigned int) mb-
>>> pkt_len,
>>   			  (int)mb->nb_segs);
>>   		ol_flags = mb->ol_flags;
>>   		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
>> --
>> 2.30.2
> 


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v8 2/4] ethdev: support multiple mbuf pools per Rx queue
  2022-10-07 18:35             ` Thomas Monjalon
@ 2022-10-07 19:45               ` Andrew Rybchenko
  0 siblings, 0 replies; 75+ messages in thread
From: Andrew Rybchenko @ 2022-10-07 19:45 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: Ferruh Yigit, dev, Hanumanth Pothula

On 10/7/22 21:35, Thomas Monjalon wrote:
> 07/10/2022 19:29, Andrew Rybchenko:
>> +* **Added support for mulitiple mbuf pools per ethdev Rx queue.**
> 
> mulitiple -> multiple
> 
> I can fix when merging.

Thanks, I've delegated first 3 patches to you. I think it is ready to
go. Let's sort out testpmd patch a bit later. May be we'll have
testpmd maintainers review.

> 
>> +
>> +  The capability allows application to provide many mempools of different
>> +  size and PMD and/or NIC to choose a memory pool based on the packet's
>> +  length and/or Rx buffers availability.
> [...]
>> +	/**
>> +	 * Array of mempools to allocate Rx buffers from.
>> +	 *
>> +	 * This provides support for multiple mbuf pools per Rx queue.
>> +	 * The capability is reported in device info via positive
>> +	 * max_rx_mempools.
>> +	 *
>> +	 * It could be useful for more efficient usage of memory when an
>> +	 * application creates different mempools to steer the specific
>> +	 * size of the packet.
>> +	 *
>> +	 * If many mempools are specified, packets received using Rx
>> +	 * burst may belong to any provided mempool. From ethdev user point
>> +	 * of view it is undefined how PMD/NIC chooses mempool for a packet.
>> +	 *
>> +	 * If Rx scatter is enabled, a packet may be delivered using a chain
>> +	 * of mbufs obtained from single mempool or multiple mempools based
>> +	 * on the NIC implementation.
>> +	 */
>> +	struct rte_mempool **rx_mempools;
>> +	uint16_t rx_nmempool; /** < Number of Rx mempools */
> 
> OK, it's clear, thanks.
> 
> 


^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [EXT] [PATCH v8 4/4] app/testpmd: support mulitiple mbuf pools per Rx queue
  2022-10-07 19:43               ` [EXT] " Andrew Rybchenko
@ 2022-10-07 19:56                 ` Hanumanth Reddy Pothula
  0 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Reddy Pothula @ 2022-10-07 19:56 UTC (permalink / raw)
  To: Andrew Rybchenko; +Cc: dev, Thomas Monjalon, Ferruh Yigit



> -----Original Message-----
> From: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
> Sent: Saturday, October 8, 2022 1:13 AM
> To: Hanumanth Reddy Pothula <hpothula@marvell.com>
> Cc: dev@dpdk.org; Thomas Monjalon <thomas@monjalon.net>; Ferruh Yigit
> <ferruh.yigit@amd.com>
> Subject: Re: [EXT] [PATCH v8 4/4] app/testpmd: support mulitiple mbuf pools per
> Rx queue
> 
> On 10/7/22 21:16, Hanumanth Reddy Pothula wrote:
> > Thanks Andrew for helping me in merging the changes.
> >
> > In below if condition, rx_conf->offloads &
> RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT will be always true as are not setting
> RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT.
> > I think we need to do && operation.
> >
> > if (rx_pkt_nb_segs <= 1 ||
> >       (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
> >           rx_conf->rx_seg = NULL;
> >           rx_conf->rx_nseg = 0;
> >           ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
> >                                        nb_rx_desc, socket_id,
> >                                        rx_conf, mp);
> >           return ret;
> >   }
> 
> That's exactly what I'm talking about in the cover letter.
> I'm not sure in testpmd patch at all. So, I vote for postponing testpmd part to rc2
> stage.
Okay, as  you mentioned lets have testpmd maintainers review.
Thanks Andrew.
> 
> >
> > Applied changes locally with && operation in above if condition and confirmed
> its working fine.
> >
> > Regards,
> > Hanumanth
> >
> >> -----Original Message-----
> >> From: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
> >> Sent: Friday, October 7, 2022 10:59 PM
> >> To: Aman Singh <aman.deep.singh@intel.com>; Yuying Zhang
> >> <yuying.zhang@intel.com>
> >> Cc: dev@dpdk.org; Hanumanth Reddy Pothula <hpothula@marvell.com>
> >> Subject: [EXT] [PATCH v8 4/4] app/testpmd: support mulitiple mbuf
> >> pools per Rx queue
> >>
> >> External Email
> >>
> >> ---------------------------------------------------------------------
> >> -
> >> From: Hanumanth Pothula <hpothula@marvell.com>
> >>
> >> Some of the HW has support for choosing memory pools based on the
> >> packet's size. The pool sort capability allows PMD/NIC to choose a
> >> memory pool based on the packet's length.
> >>
> >> On multiple mempool support enabled, populate mempool array and also
> >> print pool name on which packet is received.
> >>
> >> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> >> ---
> >>   app/test-pmd/testpmd.c | 34 ++++++++++++++++++++++++----------
> >>   app/test-pmd/testpmd.h |  3 +++
> >>   app/test-pmd/util.c    |  4 ++--
> >>   3 files changed, 29 insertions(+), 12 deletions(-)
> >>
> >> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
> >> de6ad00138..2ce9953c76 100644
> >> --- a/app/test-pmd/testpmd.c
> >> +++ b/app/test-pmd/testpmd.c
> >> @@ -2624,6 +2624,7 @@ rx_queue_setup(uint16_t port_id, uint16_t
> >> rx_queue_id,
> >>   	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)  {
> >>   	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
> >> +	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
> >>   	unsigned int i, mp_n;
> >>   	int ret;
> >>
> >> @@ -2645,16 +2646,29 @@ rx_queue_setup(uint16_t port_id, uint16_t
> >> rx_queue_id,
> >>   		 */
> >>   		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
> >>   		mpx = mbuf_pool_find(socket_id, mp_n);
> >> -		/* Handle zero as mbuf data buffer size. */
> >> -		rx_seg->length = rx_pkt_seg_lengths[i] ?
> >> -				   rx_pkt_seg_lengths[i] :
> >> -				   mbuf_data_size[mp_n];
> >> -		rx_seg->offset = i < rx_pkt_nb_offs ?
> >> -				   rx_pkt_seg_offsets[i] : 0;
> >> -		rx_seg->mp = mpx ? mpx : mp;
> >> -	}
> >> -	rx_conf->rx_nseg = rx_pkt_nb_segs;
> >> -	rx_conf->rx_seg = rx_useg;
> >> +		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)
> >> {
> >> +			/**
> >> +			 * On Segment length zero, update length as,
> >> +			 *      buffer size - headroom size
> >> +			 * to make sure enough space is accomidate for header.
> >> +			 */
> >> +			rx_seg->length = rx_pkt_seg_lengths[i] ?
> >> +					 rx_pkt_seg_lengths[i] :
> >> +					 mbuf_data_size[mp_n] -
> >> RTE_PKTMBUF_HEADROOM;
> >> +			rx_seg->offset = i < rx_pkt_nb_offs ?
> >> +					 rx_pkt_seg_offsets[i] : 0;
> >> +			rx_seg->mp = mpx ? mpx : mp;
> >> +		} else {
> >> +			rx_mempool[i] = mpx ? mpx : mp;
> >> +		}
> >> +	}
> >> +	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
> >> +		rx_conf->rx_nseg = rx_pkt_nb_segs;
> >> +		rx_conf->rx_seg = rx_useg;
> >> +	} else {
> >> +		rx_conf->rx_mempools = rx_mempool;
> >> +		rx_conf->rx_nmempool = rx_pkt_nb_segs;
> >> +	}
> >>   	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
> >>   				    socket_id, rx_conf, NULL);
> >>   	rx_conf->rx_seg = NULL;
> >> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index
> >> cbbc7cc350..2f50a10d1f 100644
> >> --- a/app/test-pmd/testpmd.h
> >> +++ b/app/test-pmd/testpmd.h
> >> @@ -80,6 +80,9 @@ extern uint8_t cl_quit;
> >>
> >>   #define MIN_TOTAL_NUM_MBUFS 1024
> >>
> >> +/* Maximum number of pools supported per Rx queue */ #define
> >> +MAX_MEMPOOL 8
> >> +
> >>   typedef uint8_t  lcoreid_t;
> >>   typedef uint16_t portid_t;
> >>   typedef uint16_t queueid_t;
> >> diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c index
> >> fd98e8b51d..f9df5f69ef 100644
> >> --- a/app/test-pmd/util.c
> >> +++ b/app/test-pmd/util.c
> >> @@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> >> struct rte_mbuf *pkts[],
> >>   		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
> >>   				 print_buf, buf_size, &cur_len);
> >>   		MKDUMPSTR(print_buf, buf_size, cur_len,
> >> -			  " - type=0x%04x - length=%u - nb_segs=%d",
> >> -			  eth_type, (unsigned int) mb->pkt_len,
> >> +			  " - pool=%s - type=0x%04x - length=%u -
> >> nb_segs=%d",
> >> +			  mb->pool->name, eth_type, (unsigned int) mb-
> >>> pkt_len,
> >>   			  (int)mb->nb_segs);
> >>   		ol_flags = mb->ol_flags;
> >>   		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
> >> --
> >> 2.30.2
> >


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v8 0/4] ethdev: support mulitiple mbuf pools per Rx queue
  2022-10-07 17:29         ` [PATCH v8 0/4] ethdev: " Andrew Rybchenko
                             ` (3 preceding siblings ...)
  2022-10-07 17:29           ` [PATCH v8 4/4] app/testpmd: " Andrew Rybchenko
@ 2022-10-08 20:38           ` Thomas Monjalon
  4 siblings, 0 replies; 75+ messages in thread
From: Thomas Monjalon @ 2022-10-08 20:38 UTC (permalink / raw)
  To: Hanumanth Pothula, Andrew Rybchenko; +Cc: Ferruh Yigit, dev

07/10/2022 19:29, Andrew Rybchenko:
> I'm not sure in testpmd patch. Review would be useful and may be we
> should postpone it to rc2.
> 
> v8:
>  - Process review notes
> v7:
>  - Drop RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL offload which seems to be
>    unnecessary. Positive max_rx_mempools in dev_info is sufficient to
>    indicate that the capability is support and positive number of
>    mempools in Rx configuration is sufficient to request it.
>  - Add helper patch to factor out Rx mempool check to be shared
>    for single mempool, buffer split and multiple mempools case.
>  - Refine check for a way to provide Rx buffers to be one and only one.
>    Either single mempool, or buffer split, or multi mempool.
>  - Drop feature advertisement in net/cnxk patch since there is no
>    such feature defined yet. I have no strong opinion if a new feature
>    is required or not.
> v6:
>  - Updated release notes, release_22_11.rst.
> v5:
>  - Declared memory pools as struct rte_mempool **rx_mempools rather than
>    as struct rte_mempool *mp.
>  - Added the feature in release notes.
>  - Updated conditions and strings as per review comments.
> v4:
>  - Renamed Offload capability name from RTE_ETH_RX_OFFLOAD_BUFFER_SORT
>    to RTE_ETH_RX_OFFLOAD_MUL_MEMPOOL.
>  - In struct rte_eth_rxconf, defined new pointer, which holds array of
>    type struct rte_eth_rx_mempool(memory pools). This array is used
>    by PMD to program multiple mempools.
> v3:
>  - Implemented Pool Sort capability as new Rx offload capability,
>    RTE_ETH_RX_OFFLOAD_BUFFER_SORT.
> v2:
>  - Along with spec changes, uploading testpmd and driver changes.
> 
> Andrew Rybchenko (1):
>   ethdev: factor out helper function to check Rx mempool
> 
> Hanumanth Pothula (3):
>   ethdev: support multiple mbuf pools per Rx queue
>   net/cnxk: support mulitiple mbuf pools per Rx queue
>   app/testpmd: support mulitiple mbuf pools per Rx queue

Applied, except testpmd patch, as recommended by Andrew, thanks.




^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v9 1/1] app/testpmd: support mulitiple mbuf pools per Rx queue
  2022-10-07 17:29           ` [PATCH v8 4/4] app/testpmd: " Andrew Rybchenko
       [not found]             ` <PH0PR18MB47500560DC1793F68E7312DDCB5F9@PH0PR18MB4750.namprd18.prod.outlook.com>
@ 2022-10-17  8:48             ` Hanumanth Pothula
  2022-10-21 15:57               ` Singh, Aman Deep
  2022-10-24  4:07               ` [PATCH v10 1/1] app/testpmd: support multiple " Hanumanth Pothula
  1 sibling, 2 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-10-17  8:48 UTC (permalink / raw)
  To: Aman Singh, Yuying Zhang
  Cc: dev, andrew.rybchenko, thomas, jerinj, ndabilpuram, hpothula

Some of the HW has support for choosing memory pools based on
the packet's size. The pool sort capability allows PMD/NIC to
choose a memory pool based on the packet's length.

On multiple mempool support enabled, populate mempool array
accordingly. Also, print pool name on which packet is received.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
---
 app/test-pmd/testpmd.c | 40 ++++++++++++++++++++++++++++------------
 app/test-pmd/testpmd.h |  3 +++
 app/test-pmd/util.c    |  4 ++--
 3 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 5b0f0838dc..1549551640 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2647,10 +2647,16 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
 {
 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
+	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
 	unsigned int i, mp_n;
 	int ret;
 
-	if (rx_pkt_nb_segs <= 1 ||
+	/* For multiple mempools per Rx queue support,
+	 * rx_pkt_nb_segs greater than 1 and
+	 * Rx offload flag, RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT won't be set.
+	 * @see rte_eth_rxconf::rx_mempools
+	 */
+	if (rx_pkt_nb_segs <= 1 &&
 	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
 		rx_conf->rx_seg = NULL;
 		rx_conf->rx_nseg = 0;
@@ -2668,20 +2674,30 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		 */
 		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
 		mpx = mbuf_pool_find(socket_id, mp_n);
-		/* Handle zero as mbuf data buffer size. */
-		rx_seg->offset = i < rx_pkt_nb_offs ?
-				   rx_pkt_seg_offsets[i] : 0;
-		rx_seg->mp = mpx ? mpx : mp;
-		if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
-			rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
+
+		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+			/* Handle zero as mbuf data buffer size. */
+			rx_seg->offset = i < rx_pkt_nb_offs ?
+					   rx_pkt_seg_offsets[i] : 0;
+			rx_seg->mp = mpx ? mpx : mp;
+			if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
+				rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
+			} else {
+				rx_seg->length = rx_pkt_seg_lengths[i] ?
+						 rx_pkt_seg_lengths[i] :
+						 mbuf_data_size[mp_n];
+			}
 		} else {
-			rx_seg->length = rx_pkt_seg_lengths[i] ?
-					rx_pkt_seg_lengths[i] :
-					mbuf_data_size[mp_n];
+			rx_mempool[i] = mpx ? mpx : mp;
 		}
 	}
-	rx_conf->rx_nseg = rx_pkt_nb_segs;
-	rx_conf->rx_seg = rx_useg;
+	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+		rx_conf->rx_nseg = rx_pkt_nb_segs;
+		rx_conf->rx_seg = rx_useg;
+	} else {
+		rx_conf->rx_mempools = rx_mempool;
+		rx_conf->rx_nmempool = rx_pkt_nb_segs;
+	}
 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
 				    socket_id, rx_conf, NULL);
 	rx_conf->rx_seg = NULL;
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index e65be323b8..14be10dcef 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -80,6 +80,9 @@ extern uint8_t cl_quit;
 
 #define MIN_TOTAL_NUM_MBUFS 1024
 
+/* Maximum number of pools supported per Rx queue */
+#define MAX_MEMPOOL 8
+
 typedef uint8_t  lcoreid_t;
 typedef uint16_t portid_t;
 typedef uint16_t queueid_t;
diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index fd98e8b51d..f9df5f69ef 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
 				 print_buf, buf_size, &cur_len);
 		MKDUMPSTR(print_buf, buf_size, cur_len,
-			  " - type=0x%04x - length=%u - nb_segs=%d",
-			  eth_type, (unsigned int) mb->pkt_len,
+			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
+			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
 		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v9 1/1] app/testpmd: support mulitiple mbuf pools per Rx queue
  2022-10-17  8:48             ` [PATCH v9 1/1] " Hanumanth Pothula
@ 2022-10-21 15:57               ` Singh, Aman Deep
  2022-10-24  3:32                 ` [EXT] " Hanumanth Reddy Pothula
  2022-10-24  4:07               ` [PATCH v10 1/1] app/testpmd: support multiple " Hanumanth Pothula
  1 sibling, 1 reply; 75+ messages in thread
From: Singh, Aman Deep @ 2022-10-21 15:57 UTC (permalink / raw)
  To: Hanumanth Pothula, Yuying Zhang
  Cc: dev, andrew.rybchenko, thomas, jerinj, ndabilpuram



On 10/17/2022 2:18 PM, Hanumanth Pothula wrote:
> Some of the HW has support for choosing memory pools based on
> the packet's size. The pool sort capability allows PMD/NIC to
> choose a memory pool based on the packet's length.
>
> On multiple mempool support enabled, populate mempool array
> accordingly. Also, print pool name on which packet is received.
>
> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> ---
>   app/test-pmd/testpmd.c | 40 ++++++++++++++++++++++++++++------------
>   app/test-pmd/testpmd.h |  3 +++
>   app/test-pmd/util.c    |  4 ++--
>   3 files changed, 33 insertions(+), 14 deletions(-)
>
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index 5b0f0838dc..1549551640 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -2647,10 +2647,16 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
>   	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
>   {
>   	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
> +	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
>   	unsigned int i, mp_n;
>   	int ret;
>   
> -	if (rx_pkt_nb_segs <= 1 ||
> +	/* For multiple mempools per Rx queue support,
> +	 * rx_pkt_nb_segs greater than 1 and
> +	 * Rx offload flag, RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT won't be set.
> +	 * @see rte_eth_rxconf::rx_mempools

I have a basic question about the feature, do we need rx_pkt_nb_segs > 1
for feature to work. My understanding is, if multiple mempools are defined
the driver will move pkts according to its size, even without split of pkts.
Just for my understanding, Thanks :)

> +	 */
> +	if (rx_pkt_nb_segs <= 1 &&
>   	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
>   		rx_conf->rx_seg = NULL;
>   		rx_conf->rx_nseg = 0;
> @@ -2668,20 +2674,30 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
>   		 */
>   		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
>   		mpx = mbuf_pool_find(socket_id, mp_n);
> -		/* Handle zero as mbuf data buffer size. */
> -		rx_seg->offset = i < rx_pkt_nb_offs ?
> -				   rx_pkt_seg_offsets[i] : 0;
> -		rx_seg->mp = mpx ? mpx : mp;
> -		if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
> -			rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
> +
> +		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
> +			/* Handle zero as mbuf data buffer size. */
> +			rx_seg->offset = i < rx_pkt_nb_offs ?
> +					   rx_pkt_seg_offsets[i] : 0;
> +			rx_seg->mp = mpx ? mpx : mp;
> +			if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
> +				rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
> +			} else {
> +				rx_seg->length = rx_pkt_seg_lengths[i] ?
> +						 rx_pkt_seg_lengths[i] :
> +						 mbuf_data_size[mp_n];
> +			}
>   		} else {
> -			rx_seg->length = rx_pkt_seg_lengths[i] ?
> -					rx_pkt_seg_lengths[i] :
> -					mbuf_data_size[mp_n];
> +			rx_mempool[i] = mpx ? mpx : mp;
>   		}
>   	}
> -	rx_conf->rx_nseg = rx_pkt_nb_segs;
> -	rx_conf->rx_seg = rx_useg;
> +	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
> +		rx_conf->rx_nseg = rx_pkt_nb_segs;
> +		rx_conf->rx_seg = rx_useg;
> +	} else {
> +		rx_conf->rx_mempools = rx_mempool;
> +		rx_conf->rx_nmempool = rx_pkt_nb_segs;
> +	}
>   	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
>   				    socket_id, rx_conf, NULL);
>   	rx_conf->rx_seg = NULL;
> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
> index e65be323b8..14be10dcef 100644
> --- a/app/test-pmd/testpmd.h
> +++ b/app/test-pmd/testpmd.h
> @@ -80,6 +80,9 @@ extern uint8_t cl_quit;
>   
>   #define MIN_TOTAL_NUM_MBUFS 1024
>   
> +/* Maximum number of pools supported per Rx queue */
> +#define MAX_MEMPOOL 8

Shoud we set it to MAX_SEGS_BUFFER_SPLIT to avoid mismatch.

> +
>   typedef uint8_t  lcoreid_t;
>   typedef uint16_t portid_t;
>   typedef uint16_t queueid_t;
> diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
> index fd98e8b51d..f9df5f69ef 100644
> --- a/app/test-pmd/util.c
> +++ b/app/test-pmd/util.c
> @@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
>   		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
>   				 print_buf, buf_size, &cur_len);
>   		MKDUMPSTR(print_buf, buf_size, cur_len,
> -			  " - type=0x%04x - length=%u - nb_segs=%d",
> -			  eth_type, (unsigned int) mb->pkt_len,
> +			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
> +			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
>   			  (int)mb->nb_segs);
>   		ol_flags = mb->ol_flags;
>   		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {


^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [EXT] Re: [PATCH v9 1/1] app/testpmd: support mulitiple mbuf pools per Rx queue
  2022-10-21 15:57               ` Singh, Aman Deep
@ 2022-10-24  3:32                 ` Hanumanth Reddy Pothula
  0 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Reddy Pothula @ 2022-10-24  3:32 UTC (permalink / raw)
  To: Singh, Aman Deep, Yuying Zhang
  Cc: dev, andrew.rybchenko, thomas, Jerin Jacob Kollanukkaran,
	Nithin Kumar Dabilpuram



> -----Original Message-----
> From: Singh, Aman Deep <aman.deep.singh@intel.com>
> Sent: Friday, October 21, 2022 9:28 PM
> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Yuying Zhang
> <yuying.zhang@intel.com>
> Cc: dev@dpdk.org; andrew.rybchenko@oktetlabs.ru; thomas@monjalon.net;
> Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Nithin Kumar Dabilpuram
> <ndabilpuram@marvell.com>
> Subject: [EXT] Re: [PATCH v9 1/1] app/testpmd: support mulitiple mbuf pools per
> Rx queue
> 
> External Email
> 
> ----------------------------------------------------------------------
> 
> 
> On 10/17/2022 2:18 PM, Hanumanth Pothula wrote:
> > Some of the HW has support for choosing memory pools based on the
> > packet's size. The pool sort capability allows PMD/NIC to choose a
> > memory pool based on the packet's length.
> >
> > On multiple mempool support enabled, populate mempool array
> > accordingly. Also, print pool name on which packet is received.
> >
> > Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> > ---
> >   app/test-pmd/testpmd.c | 40 ++++++++++++++++++++++++++++------------
> >   app/test-pmd/testpmd.h |  3 +++
> >   app/test-pmd/util.c    |  4 ++--
> >   3 files changed, 33 insertions(+), 14 deletions(-)
> >
> > diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
> > 5b0f0838dc..1549551640 100644
> > --- a/app/test-pmd/testpmd.c
> > +++ b/app/test-pmd/testpmd.c
> > @@ -2647,10 +2647,16 @@ rx_queue_setup(uint16_t port_id, uint16_t
> rx_queue_id,
> >   	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
> >   {
> >   	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
> > +	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
> >   	unsigned int i, mp_n;
> >   	int ret;
> >
> > -	if (rx_pkt_nb_segs <= 1 ||
> > +	/* For multiple mempools per Rx queue support,
> > +	 * rx_pkt_nb_segs greater than 1 and
> > +	 * Rx offload flag, RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT won't be set.
> > +	 * @see rte_eth_rxconf::rx_mempools
> 
> I have a basic question about the feature, do we need rx_pkt_nb_segs > 1 for
> feature to work. My understanding is, if multiple mempools are defined the
> driver will move pkts according to its size, even without split of pkts.
> Just for my understanding, Thanks :)
> 
Thanks Aman for the review.

Yes, rx_pkt_nb_segs > 1  not required for the multi-mempool feature. 
rx_pkt_nb_segs points to number of segments.  Need to use mbuf_data_size_n, total number of mbuf mempools, instead. Will take care this and upload new patch-set.

> > +	 */
> > +	if (rx_pkt_nb_segs <= 1 &&
> >   	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
> >   		rx_conf->rx_seg = NULL;
> >   		rx_conf->rx_nseg = 0;
> > @@ -2668,20 +2674,30 @@ rx_queue_setup(uint16_t port_id, uint16_t
> rx_queue_id,
> >   		 */
> >   		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
> >   		mpx = mbuf_pool_find(socket_id, mp_n);
> > -		/* Handle zero as mbuf data buffer size. */
> > -		rx_seg->offset = i < rx_pkt_nb_offs ?
> > -				   rx_pkt_seg_offsets[i] : 0;
> > -		rx_seg->mp = mpx ? mpx : mp;
> > -		if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
> > -			rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
> > +
> > +		if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)
> {
> > +			/* Handle zero as mbuf data buffer size. */
> > +			rx_seg->offset = i < rx_pkt_nb_offs ?
> > +					   rx_pkt_seg_offsets[i] : 0;
> > +			rx_seg->mp = mpx ? mpx : mp;
> > +			if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i]
> == 0) {
> > +				rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
> > +			} else {
> > +				rx_seg->length = rx_pkt_seg_lengths[i] ?
> > +						 rx_pkt_seg_lengths[i] :
> > +						 mbuf_data_size[mp_n];
> > +			}
> >   		} else {
> > -			rx_seg->length = rx_pkt_seg_lengths[i] ?
> > -					rx_pkt_seg_lengths[i] :
> > -					mbuf_data_size[mp_n];
> > +			rx_mempool[i] = mpx ? mpx : mp;
> >   		}
> >   	}
> > -	rx_conf->rx_nseg = rx_pkt_nb_segs;
> > -	rx_conf->rx_seg = rx_useg;
> > +	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
> > +		rx_conf->rx_nseg = rx_pkt_nb_segs;
> > +		rx_conf->rx_seg = rx_useg;
> > +	} else {
> > +		rx_conf->rx_mempools = rx_mempool;
> > +		rx_conf->rx_nmempool = rx_pkt_nb_segs;
> > +	}
> >   	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
> >   				    socket_id, rx_conf, NULL);
> >   	rx_conf->rx_seg = NULL;
> > diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index
> > e65be323b8..14be10dcef 100644
> > --- a/app/test-pmd/testpmd.h
> > +++ b/app/test-pmd/testpmd.h
> > @@ -80,6 +80,9 @@ extern uint8_t cl_quit;
> >
> >   #define MIN_TOTAL_NUM_MBUFS 1024
> >
> > +/* Maximum number of pools supported per Rx queue */ #define
> > +MAX_MEMPOOL 8
> 
> Shoud we set it to MAX_SEGS_BUFFER_SPLIT to avoid mismatch.
> 
> > +
> >   typedef uint8_t  lcoreid_t;
> >   typedef uint16_t portid_t;
> >   typedef uint16_t queueid_t;
> > diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c index
> > fd98e8b51d..f9df5f69ef 100644
> > --- a/app/test-pmd/util.c
> > +++ b/app/test-pmd/util.c
> > @@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
> >   		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
> >   				 print_buf, buf_size, &cur_len);
> >   		MKDUMPSTR(print_buf, buf_size, cur_len,
> > -			  " - type=0x%04x - length=%u - nb_segs=%d",
> > -			  eth_type, (unsigned int) mb->pkt_len,
> > +			  " - pool=%s - type=0x%04x - length=%u -
> nb_segs=%d",
> > +			  mb->pool->name, eth_type, (unsigned int) mb-
> >pkt_len,
> >   			  (int)mb->nb_segs);
> >   		ol_flags = mb->ol_flags;
> >   		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v10 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-10-17  8:48             ` [PATCH v9 1/1] " Hanumanth Pothula
  2022-10-21 15:57               ` Singh, Aman Deep
@ 2022-10-24  4:07               ` Hanumanth Pothula
  2022-10-25  1:40                 ` [PATCH v11 " Hanumanth Pothula
  1 sibling, 1 reply; 75+ messages in thread
From: Hanumanth Pothula @ 2022-10-24  4:07 UTC (permalink / raw)
  To: Aman Singh, Yuying Zhang
  Cc: dev, andrew.rybchenko, thomas, jerinj, ndabilpuram, hpothula

Some of the HW has support for choosing memory pools based on
the packet's size. The pool sort capability allows PMD/NIC to
choose a memory pool based on the packet's length.

On multiple mempool support enabled, populate mempool array
accordingly. Also, print pool name on which packet is received.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>

v9:
 - Populate multi-mempool array based on mbuf_data_size_n instead
   of rx_pkt_nb_segs.
---
 app/test-pmd/testpmd.c | 62 ++++++++++++++++++++++++++----------------
 app/test-pmd/testpmd.h |  3 ++
 app/test-pmd/util.c    |  4 +--
 3 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 5b0f0838dc..40e8522e49 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2647,11 +2647,17 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
 {
 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
+	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
 	unsigned int i, mp_n;
 	int ret;
 
-	if (rx_pkt_nb_segs <= 1 ||
-	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
+	/* Verify Rx queue configuration is single pool and segment or
+	 * multiple pool/segment.
+	 * @see rte_eth_rxconf::rx_mempools
+	 * @see rte_eth_rxconf::rx_seg
+	 */
+	if (!(mbuf_data_size_n > 1) && !(rx_pkt_nb_segs > 1 ||
+	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT != 0))) {
 		rx_conf->rx_seg = NULL;
 		rx_conf->rx_nseg = 0;
 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
@@ -2659,29 +2665,39 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 					     rx_conf, mp);
 		goto exit;
 	}
-	for (i = 0; i < rx_pkt_nb_segs; i++) {
-		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
-		struct rte_mempool *mpx;
-		/*
-		 * Use last valid pool for the segments with number
-		 * exceeding the pool index.
-		 */
-		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
-		mpx = mbuf_pool_find(socket_id, mp_n);
-		/* Handle zero as mbuf data buffer size. */
-		rx_seg->offset = i < rx_pkt_nb_offs ?
-				   rx_pkt_seg_offsets[i] : 0;
-		rx_seg->mp = mpx ? mpx : mp;
-		if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
-			rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
-		} else {
-			rx_seg->length = rx_pkt_seg_lengths[i] ?
-					rx_pkt_seg_lengths[i] :
-					mbuf_data_size[mp_n];
+	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+		for (i = 0; i < rx_pkt_nb_segs; i++) {
+			struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
+			/*
+			 * Use last valid pool for the segments with number
+			 * exceeding the pool index.
+			 */
+			mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
+			mpx = mbuf_pool_find(socket_id, mp_n);
+			if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+				/**
+				 * On Segment length zero, update length as,
+				 *      buffer size - headroom size
+				 * to make sure enough space is accomidate for header.
+				 */
+				rx_seg->length = rx_pkt_seg_lengths[i] ?
+						 rx_pkt_seg_lengths[i] :
+						 mbuf_data_size[mp_n] - RTE_PKTMBUF_HEADROOM;
+				rx_seg->offset = i < rx_pkt_nb_offs ?
+						 rx_pkt_seg_offsets[i] : 0;
+				rx_seg->mp = mpx ? mpx : mp;
+			}
+		}
+		rx_conf->rx_nseg = rx_pkt_nb_segs;
+		rx_conf->rx_seg = rx_useg;
+	} else {
+		for (i = 0; i < mbuf_data_size_n; i++) {
+			mpx = mbuf_pool_find(socket_id, i);
+			rx_mempool[i] = mpx ? mpx : mp;
 		}
+		rx_conf->rx_mempools = rx_mempool;
+		rx_conf->rx_nmempool = mbuf_data_size_n;
 	}
-	rx_conf->rx_nseg = rx_pkt_nb_segs;
-	rx_conf->rx_seg = rx_useg;
 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
 				    socket_id, rx_conf, NULL);
 	rx_conf->rx_seg = NULL;
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index e65be323b8..14be10dcef 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -80,6 +80,9 @@ extern uint8_t cl_quit;
 
 #define MIN_TOTAL_NUM_MBUFS 1024
 
+/* Maximum number of pools supported per Rx queue */
+#define MAX_MEMPOOL 8
+
 typedef uint8_t  lcoreid_t;
 typedef uint16_t portid_t;
 typedef uint16_t queueid_t;
diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index fd98e8b51d..f9df5f69ef 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
 				 print_buf, buf_size, &cur_len);
 		MKDUMPSTR(print_buf, buf_size, cur_len,
-			  " - type=0x%04x - length=%u - nb_segs=%d",
-			  eth_type, (unsigned int) mb->pkt_len,
+			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
+			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
 		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v11 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-10-24  4:07               ` [PATCH v10 1/1] app/testpmd: support multiple " Hanumanth Pothula
@ 2022-10-25  1:40                 ` Hanumanth Pothula
  2022-11-01 14:13                   ` Hanumanth Reddy Pothula
                                     ` (2 more replies)
  0 siblings, 3 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-10-25  1:40 UTC (permalink / raw)
  To: Aman Singh, Yuying Zhang
  Cc: dev, andrew.rybchenko, thomas, jerinj, ndabilpuram, hpothula

Some of the HW has support for choosing memory pools based on
the packet's size. The pool sort capability allows PMD/NIC to
choose a memory pool based on the packet's length.

On multiple mempool support enabled, populate mempool array
accordingly. Also, print pool name on which packet is received.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
v11:
 - Resolve compilation and warning.
v10:
 - Populate multi-mempool array based on mbuf_data_size_n instead
   of rx_pkt_nb_segs.
---
 app/test-pmd/testpmd.c | 63 +++++++++++++++++++++++++++---------------
 app/test-pmd/testpmd.h |  3 ++
 app/test-pmd/util.c    |  4 +--
 3 files changed, 45 insertions(+), 25 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 5b0f0838dc..62f7c9dba8 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2647,11 +2647,18 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
 {
 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
+	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
+	struct rte_mempool *mpx;
 	unsigned int i, mp_n;
 	int ret;
 
-	if (rx_pkt_nb_segs <= 1 ||
-	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
+	/* Verify Rx queue configuration is single pool and segment or
+	 * multiple pool/segment.
+	 * @see rte_eth_rxconf::rx_mempools
+	 * @see rte_eth_rxconf::rx_seg
+	 */
+	if (!(mbuf_data_size_n > 1) && !(rx_pkt_nb_segs > 1 ||
+	    ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) != 0))) {
 		rx_conf->rx_seg = NULL;
 		rx_conf->rx_nseg = 0;
 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
@@ -2659,29 +2666,39 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 					     rx_conf, mp);
 		goto exit;
 	}
-	for (i = 0; i < rx_pkt_nb_segs; i++) {
-		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
-		struct rte_mempool *mpx;
-		/*
-		 * Use last valid pool for the segments with number
-		 * exceeding the pool index.
-		 */
-		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
-		mpx = mbuf_pool_find(socket_id, mp_n);
-		/* Handle zero as mbuf data buffer size. */
-		rx_seg->offset = i < rx_pkt_nb_offs ?
-				   rx_pkt_seg_offsets[i] : 0;
-		rx_seg->mp = mpx ? mpx : mp;
-		if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
-			rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
-		} else {
-			rx_seg->length = rx_pkt_seg_lengths[i] ?
-					rx_pkt_seg_lengths[i] :
-					mbuf_data_size[mp_n];
+	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+		for (i = 0; i < rx_pkt_nb_segs; i++) {
+			struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
+			/*
+			 * Use last valid pool for the segments with number
+			 * exceeding the pool index.
+			 */
+			mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
+			mpx = mbuf_pool_find(socket_id, mp_n);
+			if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+				/**
+				 * On Segment length zero, update length as,
+				 *      buffer size - headroom size
+				 * to make sure enough space is accomidate for header.
+				 */
+				rx_seg->length = rx_pkt_seg_lengths[i] ?
+						 rx_pkt_seg_lengths[i] :
+						 mbuf_data_size[mp_n] - RTE_PKTMBUF_HEADROOM;
+				rx_seg->offset = i < rx_pkt_nb_offs ?
+						 rx_pkt_seg_offsets[i] : 0;
+				rx_seg->mp = mpx ? mpx : mp;
+			}
+		}
+		rx_conf->rx_nseg = rx_pkt_nb_segs;
+		rx_conf->rx_seg = rx_useg;
+	} else {
+		for (i = 0; i < mbuf_data_size_n; i++) {
+			mpx = mbuf_pool_find(socket_id, i);
+			rx_mempool[i] = mpx ? mpx : mp;
 		}
+		rx_conf->rx_mempools = rx_mempool;
+		rx_conf->rx_nmempool = mbuf_data_size_n;
 	}
-	rx_conf->rx_nseg = rx_pkt_nb_segs;
-	rx_conf->rx_seg = rx_useg;
 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
 				    socket_id, rx_conf, NULL);
 	rx_conf->rx_seg = NULL;
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index e65be323b8..14be10dcef 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -80,6 +80,9 @@ extern uint8_t cl_quit;
 
 #define MIN_TOTAL_NUM_MBUFS 1024
 
+/* Maximum number of pools supported per Rx queue */
+#define MAX_MEMPOOL 8
+
 typedef uint8_t  lcoreid_t;
 typedef uint16_t portid_t;
 typedef uint16_t queueid_t;
diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index fd98e8b51d..f9df5f69ef 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
 				 print_buf, buf_size, &cur_len);
 		MKDUMPSTR(print_buf, buf_size, cur_len,
-			  " - type=0x%04x - length=%u - nb_segs=%d",
-			  eth_type, (unsigned int) mb->pkt_len,
+			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
+			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
 		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [PATCH v11 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-10-25  1:40                 ` [PATCH v11 " Hanumanth Pothula
@ 2022-11-01 14:13                   ` Hanumanth Reddy Pothula
  2022-11-03 12:15                   ` Singh, Aman Deep
  2022-11-07  5:31                   ` [PATCH v12 " Hanumanth Pothula
  2 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Reddy Pothula @ 2022-11-01 14:13 UTC (permalink / raw)
  To: Hanumanth Reddy Pothula, Aman Singh, Yuying Zhang
  Cc: dev, andrew.rybchenko, thomas, Jerin Jacob Kollanukkaran,
	Nithin Kumar Dabilpuram

Ping

> -----Original Message-----
> From: Hanumanth Pothula <hpothula@marvell.com>
> Sent: Tuesday, October 25, 2022 7:10 AM
> To: Aman Singh <aman.deep.singh@intel.com>; Yuying Zhang
> <yuying.zhang@intel.com>
> Cc: dev@dpdk.org; andrew.rybchenko@oktetlabs.ru; thomas@monjalon.net;
> Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Nithin Kumar Dabilpuram
> <ndabilpuram@marvell.com>; Hanumanth Reddy Pothula
> <hpothula@marvell.com>
> Subject: [PATCH v11 1/1] app/testpmd: support multiple mbuf pools per Rx
> queue
> 
> Some of the HW has support for choosing memory pools based on the packet's
> size. The pool sort capability allows PMD/NIC to choose a memory pool based
> on the packet's length.
> 
> On multiple mempool support enabled, populate mempool array accordingly.
> Also, print pool name on which packet is received.
> 
> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> v11:
>  - Resolve compilation and warning.
> v10:
>  - Populate multi-mempool array based on mbuf_data_size_n instead
>    of rx_pkt_nb_segs.
> ---
>  app/test-pmd/testpmd.c | 63 +++++++++++++++++++++++++++---------------
>  app/test-pmd/testpmd.h |  3 ++
>  app/test-pmd/util.c    |  4 +--
>  3 files changed, 45 insertions(+), 25 deletions(-)
> 
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
> 5b0f0838dc..62f7c9dba8 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -2647,11 +2647,18 @@ rx_queue_setup(uint16_t port_id, uint16_t
> rx_queue_id,
>  	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)  {
>  	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
> +	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
> +	struct rte_mempool *mpx;
>  	unsigned int i, mp_n;
>  	int ret;
> 
> -	if (rx_pkt_nb_segs <= 1 ||
> -	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
> +	/* Verify Rx queue configuration is single pool and segment or
> +	 * multiple pool/segment.
> +	 * @see rte_eth_rxconf::rx_mempools
> +	 * @see rte_eth_rxconf::rx_seg
> +	 */
> +	if (!(mbuf_data_size_n > 1) && !(rx_pkt_nb_segs > 1 ||
> +	    ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) != 0))) {
>  		rx_conf->rx_seg = NULL;
>  		rx_conf->rx_nseg = 0;
>  		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, @@ -
> 2659,29 +2666,39 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
>  					     rx_conf, mp);
>  		goto exit;
>  	}
> -	for (i = 0; i < rx_pkt_nb_segs; i++) {
> -		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
> -		struct rte_mempool *mpx;
> -		/*
> -		 * Use last valid pool for the segments with number
> -		 * exceeding the pool index.
> -		 */
> -		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
> -		mpx = mbuf_pool_find(socket_id, mp_n);
> -		/* Handle zero as mbuf data buffer size. */
> -		rx_seg->offset = i < rx_pkt_nb_offs ?
> -				   rx_pkt_seg_offsets[i] : 0;
> -		rx_seg->mp = mpx ? mpx : mp;
> -		if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
> -			rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
> -		} else {
> -			rx_seg->length = rx_pkt_seg_lengths[i] ?
> -					rx_pkt_seg_lengths[i] :
> -					mbuf_data_size[mp_n];
> +	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
> +		for (i = 0; i < rx_pkt_nb_segs; i++) {
> +			struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
> +			/*
> +			 * Use last valid pool for the segments with number
> +			 * exceeding the pool index.
> +			 */
> +			mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 :
> i;
> +			mpx = mbuf_pool_find(socket_id, mp_n);
> +			if (rx_conf->offloads &
> RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
> +				/**
> +				 * On Segment length zero, update length as,
> +				 *      buffer size - headroom size
> +				 * to make sure enough space is accomidate for
> header.
> +				 */
> +				rx_seg->length = rx_pkt_seg_lengths[i] ?
> +						 rx_pkt_seg_lengths[i] :
> +						 mbuf_data_size[mp_n] -
> RTE_PKTMBUF_HEADROOM;
> +				rx_seg->offset = i < rx_pkt_nb_offs ?
> +						 rx_pkt_seg_offsets[i] : 0;
> +				rx_seg->mp = mpx ? mpx : mp;
> +			}
> +		}
> +		rx_conf->rx_nseg = rx_pkt_nb_segs;
> +		rx_conf->rx_seg = rx_useg;
> +	} else {
> +		for (i = 0; i < mbuf_data_size_n; i++) {
> +			mpx = mbuf_pool_find(socket_id, i);
> +			rx_mempool[i] = mpx ? mpx : mp;
>  		}
> +		rx_conf->rx_mempools = rx_mempool;
> +		rx_conf->rx_nmempool = mbuf_data_size_n;
>  	}
> -	rx_conf->rx_nseg = rx_pkt_nb_segs;
> -	rx_conf->rx_seg = rx_useg;
>  	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
>  				    socket_id, rx_conf, NULL);
>  	rx_conf->rx_seg = NULL;
> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index
> e65be323b8..14be10dcef 100644
> --- a/app/test-pmd/testpmd.h
> +++ b/app/test-pmd/testpmd.h
> @@ -80,6 +80,9 @@ extern uint8_t cl_quit;
> 
>  #define MIN_TOTAL_NUM_MBUFS 1024
> 
> +/* Maximum number of pools supported per Rx queue */ #define
> +MAX_MEMPOOL 8
> +
>  typedef uint8_t  lcoreid_t;
>  typedef uint16_t portid_t;
>  typedef uint16_t queueid_t;
> diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c index
> fd98e8b51d..f9df5f69ef 100644
> --- a/app/test-pmd/util.c
> +++ b/app/test-pmd/util.c
> @@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
>  		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
>  				 print_buf, buf_size, &cur_len);
>  		MKDUMPSTR(print_buf, buf_size, cur_len,
> -			  " - type=0x%04x - length=%u - nb_segs=%d",
> -			  eth_type, (unsigned int) mb->pkt_len,
> +			  " - pool=%s - type=0x%04x - length=%u -
> nb_segs=%d",
> +			  mb->pool->name, eth_type, (unsigned int) mb-
> >pkt_len,
>  			  (int)mb->nb_segs);
>  		ol_flags = mb->ol_flags;
>  		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v11 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-10-25  1:40                 ` [PATCH v11 " Hanumanth Pothula
  2022-11-01 14:13                   ` Hanumanth Reddy Pothula
@ 2022-11-03 12:15                   ` Singh, Aman Deep
  2022-11-03 12:36                     ` [EXT] " Hanumanth Reddy Pothula
  2022-11-07  5:31                   ` [PATCH v12 " Hanumanth Pothula
  2 siblings, 1 reply; 75+ messages in thread
From: Singh, Aman Deep @ 2022-11-03 12:15 UTC (permalink / raw)
  To: Hanumanth Pothula, Yuying Zhang
  Cc: dev, andrew.rybchenko, thomas, jerinj, ndabilpuram



On 10/25/2022 7:10 AM, Hanumanth Pothula wrote:
> Some of the HW has support for choosing memory pools based on
> the packet's size. The pool sort capability allows PMD/NIC to
> choose a memory pool based on the packet's length.
>
> On multiple mempool support enabled, populate mempool array
> accordingly. Also, print pool name on which packet is received.
>
> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> v11:
>   - Resolve compilation and warning.
> v10:
>   - Populate multi-mempool array based on mbuf_data_size_n instead
>     of rx_pkt_nb_segs.
> ---
>   app/test-pmd/testpmd.c | 63 +++++++++++++++++++++++++++---------------
>   app/test-pmd/testpmd.h |  3 ++
>   app/test-pmd/util.c    |  4 +--
>   3 files changed, 45 insertions(+), 25 deletions(-)
>
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index 5b0f0838dc..62f7c9dba8 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -2647,11 +2647,18 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
>   	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
>   {
>   	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
> +	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
> +	struct rte_mempool *mpx;
>   	unsigned int i, mp_n;
>   	int ret;
>   
> -	if (rx_pkt_nb_segs <= 1 ||
> -	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
> +	/* Verify Rx queue configuration is single pool and segment or
> +	 * multiple pool/segment.
> +	 * @see rte_eth_rxconf::rx_mempools
> +	 * @see rte_eth_rxconf::rx_seg
> +	 */
> +	if (!(mbuf_data_size_n > 1) && !(rx_pkt_nb_segs > 1 ||
> +	    ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) != 0))) {
>   		rx_conf->rx_seg = NULL;
>   		rx_conf->rx_nseg = 0;
>   		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
> @@ -2659,29 +2666,39 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
>   					     rx_conf, mp);
>   		goto exit;
>   	}
> -	for (i = 0; i < rx_pkt_nb_segs; i++) {
> -		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
> -		struct rte_mempool *mpx;
> -		/*
> -		 * Use last valid pool for the segments with number
> -		 * exceeding the pool index.
> -		 */
> -		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
> -		mpx = mbuf_pool_find(socket_id, mp_n);
> -		/* Handle zero as mbuf data buffer size. */
> -		rx_seg->offset = i < rx_pkt_nb_offs ?
> -				   rx_pkt_seg_offsets[i] : 0;
> -		rx_seg->mp = mpx ? mpx : mp;
> -		if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
> -			rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
> -		} else {
> -			rx_seg->length = rx_pkt_seg_lengths[i] ?
> -					rx_pkt_seg_lengths[i] :
> -					mbuf_data_size[mp_n];
> +	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {

In case this flag *_OFFLOAD_BUFFER_SPLIT is not set, but rx_pkt_nb_segs > 1
Will it still enter below loop, as before.

> +		for (i = 0; i < rx_pkt_nb_segs; i++) {
> +			struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
> +			/*
> +			 * Use last valid pool for the segments with number
> +			 * exceeding the pool index.
> +			 */
> +			mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
> +			mpx = mbuf_pool_find(socket_id, mp_n);
> +			if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {

Isn't above check already found to be TRUE, before we reached here.

> +				/**
> +				 * On Segment length zero, update length as,
> +				 *      buffer size - headroom size
> +				 * to make sure enough space is accomidate for header.
> +				 */
> +				rx_seg->length = rx_pkt_seg_lengths[i] ?
> +						 rx_pkt_seg_lengths[i] :
> +						 mbuf_data_size[mp_n] - RTE_PKTMBUF_HEADROOM;
> +				rx_seg->offset = i < rx_pkt_nb_offs ?
> +						 rx_pkt_seg_offsets[i] : 0;
> +				rx_seg->mp = mpx ? mpx : mp;
> +			}
> +		}
> +		rx_conf->rx_nseg = rx_pkt_nb_segs;
> +		rx_conf->rx_seg = rx_useg;
> +	} else {
> +		for (i = 0; i < mbuf_data_size_n; i++) {
> +			mpx = mbuf_pool_find(socket_id, i);
> +			rx_mempool[i] = mpx ? mpx : mp;
>   		}
> +		rx_conf->rx_mempools = rx_mempool;
> +		rx_conf->rx_nmempool = mbuf_data_size_n;
>   	}
> -	rx_conf->rx_nseg = rx_pkt_nb_segs;
> -	rx_conf->rx_seg = rx_useg;
>   	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
>   				    socket_id, rx_conf, NULL);
>   	rx_conf->rx_seg = NULL;
> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
> index e65be323b8..14be10dcef 100644
> --- a/app/test-pmd/testpmd.h
> +++ b/app/test-pmd/testpmd.h
> @@ -80,6 +80,9 @@ extern uint8_t cl_quit;
>   
>   #define MIN_TOTAL_NUM_MBUFS 1024
>   
> +/* Maximum number of pools supported per Rx queue */
> +#define MAX_MEMPOOL 8
> +
>   typedef uint8_t  lcoreid_t;
>   typedef uint16_t portid_t;
>   typedef uint16_t queueid_t;
> diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
> index fd98e8b51d..f9df5f69ef 100644
> --- a/app/test-pmd/util.c
> +++ b/app/test-pmd/util.c
> @@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
>   		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
>   				 print_buf, buf_size, &cur_len);
>   		MKDUMPSTR(print_buf, buf_size, cur_len,
> -			  " - type=0x%04x - length=%u - nb_segs=%d",
> -			  eth_type, (unsigned int) mb->pkt_len,
> +			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
> +			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
>   			  (int)mb->nb_segs);
>   		ol_flags = mb->ol_flags;
>   		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {


^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [EXT] Re: [PATCH v11 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-11-03 12:15                   ` Singh, Aman Deep
@ 2022-11-03 12:36                     ` Hanumanth Reddy Pothula
  2022-11-03 15:20                       ` Singh, Aman Deep
  0 siblings, 1 reply; 75+ messages in thread
From: Hanumanth Reddy Pothula @ 2022-11-03 12:36 UTC (permalink / raw)
  To: Singh, Aman Deep, Yuying Zhang
  Cc: dev, andrew.rybchenko, thomas, Jerin Jacob Kollanukkaran,
	Nithin Kumar Dabilpuram



> -----Original Message-----
> From: Singh, Aman Deep <aman.deep.singh@intel.com>
> Sent: Thursday, November 3, 2022 5:46 PM
> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Yuying Zhang
> <yuying.zhang@intel.com>
> Cc: dev@dpdk.org; andrew.rybchenko@oktetlabs.ru; thomas@monjalon.net;
> Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Nithin Kumar Dabilpuram
> <ndabilpuram@marvell.com>
> Subject: [EXT] Re: [PATCH v11 1/1] app/testpmd: support multiple mbuf pools
> per Rx queue
> 
> External Email
> 
> ----------------------------------------------------------------------
> 
> 
> On 10/25/2022 7:10 AM, Hanumanth Pothula wrote:
> > Some of the HW has support for choosing memory pools based on the
> > packet's size. The pool sort capability allows PMD/NIC to choose a
> > memory pool based on the packet's length.
> >
> > On multiple mempool support enabled, populate mempool array
> > accordingly. Also, print pool name on which packet is received.
> >
> > Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> > v11:
> >   - Resolve compilation and warning.
> > v10:
> >   - Populate multi-mempool array based on mbuf_data_size_n instead
> >     of rx_pkt_nb_segs.
> > ---
> >   app/test-pmd/testpmd.c | 63 +++++++++++++++++++++++++++---------------
> >   app/test-pmd/testpmd.h |  3 ++
> >   app/test-pmd/util.c    |  4 +--
> >   3 files changed, 45 insertions(+), 25 deletions(-)
> >
> > diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
> > 5b0f0838dc..62f7c9dba8 100644
> > --- a/app/test-pmd/testpmd.c
> > +++ b/app/test-pmd/testpmd.c
> > @@ -2647,11 +2647,18 @@ rx_queue_setup(uint16_t port_id, uint16_t
> rx_queue_id,
> >   	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
> >   {
> >   	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
> > +	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
> > +	struct rte_mempool *mpx;
> >   	unsigned int i, mp_n;
> >   	int ret;
> >
> > -	if (rx_pkt_nb_segs <= 1 ||
> > -	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
> > +	/* Verify Rx queue configuration is single pool and segment or
> > +	 * multiple pool/segment.
> > +	 * @see rte_eth_rxconf::rx_mempools
> > +	 * @see rte_eth_rxconf::rx_seg
> > +	 */
> > +	if (!(mbuf_data_size_n > 1) && !(rx_pkt_nb_segs > 1 ||
> > +	    ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) != 0))) {
> >   		rx_conf->rx_seg = NULL;
> >   		rx_conf->rx_nseg = 0;
> >   		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, @@ -
> 2659,29
> > +2666,39 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
> >   					     rx_conf, mp);
> >   		goto exit;
> >   	}
> > -	for (i = 0; i < rx_pkt_nb_segs; i++) {
> > -		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
> > -		struct rte_mempool *mpx;
> > -		/*
> > -		 * Use last valid pool for the segments with number
> > -		 * exceeding the pool index.
> > -		 */
> > -		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
> > -		mpx = mbuf_pool_find(socket_id, mp_n);
> > -		/* Handle zero as mbuf data buffer size. */
> > -		rx_seg->offset = i < rx_pkt_nb_offs ?
> > -				   rx_pkt_seg_offsets[i] : 0;
> > -		rx_seg->mp = mpx ? mpx : mp;
> > -		if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
> > -			rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
> > -		} else {
> > -			rx_seg->length = rx_pkt_seg_lengths[i] ?
> > -					rx_pkt_seg_lengths[i] :
> > -					mbuf_data_size[mp_n];
> > +	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
> 
> In case this flag *_OFFLOAD_BUFFER_SPLIT is not set, but rx_pkt_nb_segs > 1
> Will it still enter below loop, as before.

Yes Aman, RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT flag to be set to proceed further.
Do you suggest to enter the loop on  rx_pkt_nb_segs > 1 irrespective of RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT flag. 
Something like, 
if (rx_pkt_nb_segs > 1) {
	for(i = 0; i < rx_pkt_nb_segs; i++){
	}
}

> 
> > +		for (i = 0; i < rx_pkt_nb_segs; i++) {
> > +			struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
> > +			/*
> > +			 * Use last valid pool for the segments with number
> > +			 * exceeding the pool index.
> > +			 */
> > +			mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 :
> i;
> > +			mpx = mbuf_pool_find(socket_id, mp_n);
> > +			if (rx_conf->offloads &
> RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
> 
> Isn't above check already found to be TRUE, before we reached here.
Yes this is redundant, will remove.
> 
> > +				/**
> > +				 * On Segment length zero, update length as,
> > +				 *      buffer size - headroom size
> > +				 * to make sure enough space is accomidate for
> header.
> > +				 */
> > +				rx_seg->length = rx_pkt_seg_lengths[i] ?
> > +						 rx_pkt_seg_lengths[i] :
> > +						 mbuf_data_size[mp_n] -
> RTE_PKTMBUF_HEADROOM;
> > +				rx_seg->offset = i < rx_pkt_nb_offs ?
> > +						 rx_pkt_seg_offsets[i] : 0;
> > +				rx_seg->mp = mpx ? mpx : mp;
> > +			}
> > +		}
> > +		rx_conf->rx_nseg = rx_pkt_nb_segs;
> > +		rx_conf->rx_seg = rx_useg;
> > +	} else {
> > +		for (i = 0; i < mbuf_data_size_n; i++) {
> > +			mpx = mbuf_pool_find(socket_id, i);
> > +			rx_mempool[i] = mpx ? mpx : mp;
> >   		}
> > +		rx_conf->rx_mempools = rx_mempool;
> > +		rx_conf->rx_nmempool = mbuf_data_size_n;
> >   	}
> > -	rx_conf->rx_nseg = rx_pkt_nb_segs;
> > -	rx_conf->rx_seg = rx_useg;
> >   	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
> >   				    socket_id, rx_conf, NULL);
> >   	rx_conf->rx_seg = NULL;
> > diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index
> > e65be323b8..14be10dcef 100644
> > --- a/app/test-pmd/testpmd.h
> > +++ b/app/test-pmd/testpmd.h
> > @@ -80,6 +80,9 @@ extern uint8_t cl_quit;
> >
> >   #define MIN_TOTAL_NUM_MBUFS 1024
> >
> > +/* Maximum number of pools supported per Rx queue */ #define
> > +MAX_MEMPOOL 8
> > +
> >   typedef uint8_t  lcoreid_t;
> >   typedef uint16_t portid_t;
> >   typedef uint16_t queueid_t;
> > diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c index
> > fd98e8b51d..f9df5f69ef 100644
> > --- a/app/test-pmd/util.c
> > +++ b/app/test-pmd/util.c
> > @@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
> >   		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
> >   				 print_buf, buf_size, &cur_len);
> >   		MKDUMPSTR(print_buf, buf_size, cur_len,
> > -			  " - type=0x%04x - length=%u - nb_segs=%d",
> > -			  eth_type, (unsigned int) mb->pkt_len,
> > +			  " - pool=%s - type=0x%04x - length=%u -
> nb_segs=%d",
> > +			  mb->pool->name, eth_type, (unsigned int) mb-
> >pkt_len,
> >   			  (int)mb->nb_segs);
> >   		ol_flags = mb->ol_flags;
> >   		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [EXT] Re: [PATCH v11 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-11-03 12:36                     ` [EXT] " Hanumanth Reddy Pothula
@ 2022-11-03 15:20                       ` Singh, Aman Deep
  2022-11-04 15:38                         ` Hanumanth Reddy Pothula
  0 siblings, 1 reply; 75+ messages in thread
From: Singh, Aman Deep @ 2022-11-03 15:20 UTC (permalink / raw)
  To: Hanumanth Reddy Pothula, Yuying Zhang
  Cc: dev, andrew.rybchenko, thomas, Jerin Jacob Kollanukkaran,
	Nithin Kumar Dabilpuram



On 11/3/2022 6:06 PM, Hanumanth Reddy Pothula wrote:
>
>> -----Original Message-----
>> From: Singh, Aman Deep <aman.deep.singh@intel.com>
>> Sent: Thursday, November 3, 2022 5:46 PM
>> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Yuying Zhang
>> <yuying.zhang@intel.com>
>> Cc: dev@dpdk.org; andrew.rybchenko@oktetlabs.ru; thomas@monjalon.net;
>> Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Nithin Kumar Dabilpuram
>> <ndabilpuram@marvell.com>
>> Subject: [EXT] Re: [PATCH v11 1/1] app/testpmd: support multiple mbuf pools
>> per Rx queue
>>
>> External Email
>>
>> ----------------------------------------------------------------------
>>
>>
>> On 10/25/2022 7:10 AM, Hanumanth Pothula wrote:
>>> Some of the HW has support for choosing memory pools based on the
>>> packet's size. The pool sort capability allows PMD/NIC to choose a
>>> memory pool based on the packet's length.
>>>
>>> On multiple mempool support enabled, populate mempool array
>>> accordingly. Also, print pool name on which packet is received.
>>>
>>> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
>>> v11:
>>>    - Resolve compilation and warning.
>>> v10:
>>>    - Populate multi-mempool array based on mbuf_data_size_n instead
>>>      of rx_pkt_nb_segs.
>>> ---
>>>    app/test-pmd/testpmd.c | 63 +++++++++++++++++++++++++++---------------
>>>    app/test-pmd/testpmd.h |  3 ++
>>>    app/test-pmd/util.c    |  4 +--
>>>    3 files changed, 45 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
>>> 5b0f0838dc..62f7c9dba8 100644
>>> --- a/app/test-pmd/testpmd.c
>>> +++ b/app/test-pmd/testpmd.c
>>> @@ -2647,11 +2647,18 @@ rx_queue_setup(uint16_t port_id, uint16_t
>> rx_queue_id,
>>>    	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
>>>    {
>>>    	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
>>> +	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
>>> +	struct rte_mempool *mpx;
>>>    	unsigned int i, mp_n;
>>>    	int ret;
>>>
>>> -	if (rx_pkt_nb_segs <= 1 ||
>>> -	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
>>> +	/* Verify Rx queue configuration is single pool and segment or
>>> +	 * multiple pool/segment.
>>> +	 * @see rte_eth_rxconf::rx_mempools
>>> +	 * @see rte_eth_rxconf::rx_seg
>>> +	 */
>>> +	if (!(mbuf_data_size_n > 1) && !(rx_pkt_nb_segs > 1 ||
>>> +	    ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) != 0))) {
>>>    		rx_conf->rx_seg = NULL;
>>>    		rx_conf->rx_nseg = 0;
>>>    		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, @@ -
>> 2659,29
>>> +2666,39 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
>>>    					     rx_conf, mp);
>>>    		goto exit;
>>>    	}
>>> -	for (i = 0; i < rx_pkt_nb_segs; i++) {
>>> -		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
>>> -		struct rte_mempool *mpx;
>>> -		/*
>>> -		 * Use last valid pool for the segments with number
>>> -		 * exceeding the pool index.
>>> -		 */
>>> -		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
>>> -		mpx = mbuf_pool_find(socket_id, mp_n);
>>> -		/* Handle zero as mbuf data buffer size. */
>>> -		rx_seg->offset = i < rx_pkt_nb_offs ?
>>> -				   rx_pkt_seg_offsets[i] : 0;
>>> -		rx_seg->mp = mpx ? mpx : mp;
>>> -		if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
>>> -			rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
>>> -		} else {
>>> -			rx_seg->length = rx_pkt_seg_lengths[i] ?
>>> -					rx_pkt_seg_lengths[i] :
>>> -					mbuf_data_size[mp_n];
>>> +	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
>> In case this flag *_OFFLOAD_BUFFER_SPLIT is not set, but rx_pkt_nb_segs > 1
>> Will it still enter below loop, as before.
> Yes Aman, RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT flag to be set to proceed further.
> Do you suggest to enter the loop on  rx_pkt_nb_segs > 1 irrespective of RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT flag.
> Something like,
> if (rx_pkt_nb_segs > 1) {
> 	for(i = 0; i < rx_pkt_nb_segs; i++){
> 	}
> }

As per the old logic, either of the case was supported-
if (rx_pkt_nb_segs <= 1 ||
(rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0)

>
>>> +		for (i = 0; i < rx_pkt_nb_segs; i++) {
>>> +			struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
>>> +			/*
>>> +			 * Use last valid pool for the segments with number
>>> +			 * exceeding the pool index.
>>> +			 */
>>> +			mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 :
>> i;
>>> +			mpx = mbuf_pool_find(socket_id, mp_n);
>>> +			if (rx_conf->offloads &
>> RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
>>
>> Isn't above check already found to be TRUE, before we reached here.
> Yes this is redundant, will remove.
>>> +				/**
>>> +				 * On Segment length zero, update length as,
>>> +				 *      buffer size - headroom size
>>> +				 * to make sure enough space is accomidate for
>> header.
>>> +				 */
>>> +				rx_seg->length = rx_pkt_seg_lengths[i] ?
>>> +						 rx_pkt_seg_lengths[i] :
>>> +						 mbuf_data_size[mp_n] -
>> RTE_PKTMBUF_HEADROOM;
>>> +				rx_seg->offset = i < rx_pkt_nb_offs ?
>>> +						 rx_pkt_seg_offsets[i] : 0;
>>> +				rx_seg->mp = mpx ? mpx : mp;
>>> +			}
>>> +		}
>>> +		rx_conf->rx_nseg = rx_pkt_nb_segs;
>>> +		rx_conf->rx_seg = rx_useg;
>>> +	} else {
>>> +		for (i = 0; i < mbuf_data_size_n; i++) {
>>> +			mpx = mbuf_pool_find(socket_id, i);
>>> +			rx_mempool[i] = mpx ? mpx : mp;
>>>    		}
>>> +		rx_conf->rx_mempools = rx_mempool;
>>> +		rx_conf->rx_nmempool = mbuf_data_size_n;
>>>    	}
>>> -	rx_conf->rx_nseg = rx_pkt_nb_segs;
>>> -	rx_conf->rx_seg = rx_useg;
>>>    	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
>>>    				    socket_id, rx_conf, NULL);
>>>    	rx_conf->rx_seg = NULL;
>>> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index
>>> e65be323b8..14be10dcef 100644
>>> --- a/app/test-pmd/testpmd.h
>>> +++ b/app/test-pmd/testpmd.h
>>> @@ -80,6 +80,9 @@ extern uint8_t cl_quit;
>>>
>>>    #define MIN_TOTAL_NUM_MBUFS 1024
>>>
>>> +/* Maximum number of pools supported per Rx queue */ #define
>>> +MAX_MEMPOOL 8
>>> +
>>>    typedef uint8_t  lcoreid_t;
>>>    typedef uint16_t portid_t;
>>>    typedef uint16_t queueid_t;
>>> diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c index
>>> fd98e8b51d..f9df5f69ef 100644
>>> --- a/app/test-pmd/util.c
>>> +++ b/app/test-pmd/util.c
>>> @@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
>> struct rte_mbuf *pkts[],
>>>    		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
>>>    				 print_buf, buf_size, &cur_len);
>>>    		MKDUMPSTR(print_buf, buf_size, cur_len,
>>> -			  " - type=0x%04x - length=%u - nb_segs=%d",
>>> -			  eth_type, (unsigned int) mb->pkt_len,
>>> +			  " - pool=%s - type=0x%04x - length=%u -
>> nb_segs=%d",
>>> +			  mb->pool->name, eth_type, (unsigned int) mb-
>>> pkt_len,
>>>    			  (int)mb->nb_segs);
>>>    		ol_flags = mb->ol_flags;
>>>    		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {


^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [EXT] Re: [PATCH v11 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-11-03 15:20                       ` Singh, Aman Deep
@ 2022-11-04 15:38                         ` Hanumanth Reddy Pothula
  0 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Reddy Pothula @ 2022-11-04 15:38 UTC (permalink / raw)
  To: Singh, Aman Deep, Yuying Zhang
  Cc: dev, andrew.rybchenko, thomas, Jerin Jacob Kollanukkaran,
	Nithin Kumar Dabilpuram



> -----Original Message-----
> From: Singh, Aman Deep <aman.deep.singh@intel.com>
> Sent: Thursday, November 3, 2022 8:50 PM
> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Yuying Zhang
> <yuying.zhang@intel.com>
> Cc: dev@dpdk.org; andrew.rybchenko@oktetlabs.ru;
> thomas@monjalon.net; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>
> Subject: Re: [EXT] Re: [PATCH v11 1/1] app/testpmd: support multiple
> mbuf pools per Rx queue
> 
> 
> 
> On 11/3/2022 6:06 PM, Hanumanth Reddy Pothula wrote:
> >
> >> -----Original Message-----
> >> From: Singh, Aman Deep <aman.deep.singh@intel.com>
> >> Sent: Thursday, November 3, 2022 5:46 PM
> >> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Yuying Zhang
> >> <yuying.zhang@intel.com>
> >> Cc: dev@dpdk.org; andrew.rybchenko@oktetlabs.ru;
> thomas@monjalon.net;
> >> Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Nithin Kumar
> >> Dabilpuram <ndabilpuram@marvell.com>
> >> Subject: [EXT] Re: [PATCH v11 1/1] app/testpmd: support multiple mbuf
> >> pools per Rx queue
> >>
> >> External Email
> >>
> >> ---------------------------------------------------------------------
> >> -
> >>
> >>
> >> On 10/25/2022 7:10 AM, Hanumanth Pothula wrote:
> >>> Some of the HW has support for choosing memory pools based on the
> >>> packet's size. The pool sort capability allows PMD/NIC to choose a
> >>> memory pool based on the packet's length.
> >>>
> >>> On multiple mempool support enabled, populate mempool array
> >>> accordingly. Also, print pool name on which packet is received.
> >>>
> >>> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> >>> v11:
> >>>    - Resolve compilation and warning.
> >>> v10:
> >>>    - Populate multi-mempool array based on mbuf_data_size_n instead
> >>>      of rx_pkt_nb_segs.
> >>> ---
> >>>    app/test-pmd/testpmd.c | 63 +++++++++++++++++++++++++++------
> ---------
> >>>    app/test-pmd/testpmd.h |  3 ++
> >>>    app/test-pmd/util.c    |  4 +--
> >>>    3 files changed, 45 insertions(+), 25 deletions(-)
> >>>
> >>> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
> >>> 5b0f0838dc..62f7c9dba8 100644
> >>> --- a/app/test-pmd/testpmd.c
> >>> +++ b/app/test-pmd/testpmd.c
> >>> @@ -2647,11 +2647,18 @@ rx_queue_setup(uint16_t port_id,
> uint16_t
> >> rx_queue_id,
> >>>    	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
> >>>    {
> >>>    	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
> >>> +	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
> >>> +	struct rte_mempool *mpx;
> >>>    	unsigned int i, mp_n;
> >>>    	int ret;
> >>>
> >>> -	if (rx_pkt_nb_segs <= 1 ||
> >>> -	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) ==
> 0) {
> >>> +	/* Verify Rx queue configuration is single pool and segment or
> >>> +	 * multiple pool/segment.
> >>> +	 * @see rte_eth_rxconf::rx_mempools
> >>> +	 * @see rte_eth_rxconf::rx_seg
> >>> +	 */
> >>> +	if (!(mbuf_data_size_n > 1) && !(rx_pkt_nb_segs > 1 ||
> >>> +	    ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) !=
> 0)))
> >>> +{
> >>>    		rx_conf->rx_seg = NULL;
> >>>    		rx_conf->rx_nseg = 0;
> >>>    		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, @@ -
> >> 2659,29
> >>> +2666,39 @@ rx_queue_setup(uint16_t port_id, uint16_t
> rx_queue_id,
> >>>    					     rx_conf, mp);
> >>>    		goto exit;
> >>>    	}
> >>> -	for (i = 0; i < rx_pkt_nb_segs; i++) {
> >>> -		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
> >>> -		struct rte_mempool *mpx;
> >>> -		/*
> >>> -		 * Use last valid pool for the segments with number
> >>> -		 * exceeding the pool index.
> >>> -		 */
> >>> -		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
> >>> -		mpx = mbuf_pool_find(socket_id, mp_n);
> >>> -		/* Handle zero as mbuf data buffer size. */
> >>> -		rx_seg->offset = i < rx_pkt_nb_offs ?
> >>> -				   rx_pkt_seg_offsets[i] : 0;
> >>> -		rx_seg->mp = mpx ? mpx : mp;
> >>> -		if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0)
> {
> >>> -			rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
> >>> -		} else {
> >>> -			rx_seg->length = rx_pkt_seg_lengths[i] ?
> >>> -					rx_pkt_seg_lengths[i] :
> >>> -					mbuf_data_size[mp_n];
> >>> +	if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
> >> In case this flag *_OFFLOAD_BUFFER_SPLIT is not set, but
> >> rx_pkt_nb_segs > 1 Will it still enter below loop, as before.
> > Yes Aman, RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT flag to be set to
> proceed further.
> > Do you suggest to enter the loop on  rx_pkt_nb_segs > 1 irrespective of
> RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT flag.
> > Something like,
> > if (rx_pkt_nb_segs > 1) {
> > 	for(i = 0; i < rx_pkt_nb_segs; i++){
> > 	}
> > }
> 
> As per the old logic, either of the case was supported- if (rx_pkt_nb_segs <=
> 1 || (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0)
> 


Yes, will update accordingly.

> >
> >>> +		for (i = 0; i < rx_pkt_nb_segs; i++) {
> >>> +			struct rte_eth_rxseg_split *rx_seg =
> &rx_useg[i].split;
> >>> +			/*
> >>> +			 * Use last valid pool for the segments with number
> >>> +			 * exceeding the pool index.
> >>> +			 */
> >>> +			mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n
> - 1 :
> >> i;
> >>> +			mpx = mbuf_pool_find(socket_id, mp_n);
> >>> +			if (rx_conf->offloads &
> >> RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
> >>
> >> Isn't above check already found to be TRUE, before we reached here.
> > Yes this is redundant, will remove.
> >>> +				/**
> >>> +				 * On Segment length zero, update length as,
> >>> +				 *      buffer size - headroom size
> >>> +				 * to make sure enough space is accomidate
> for
> >> header.
> >>> +				 */
> >>> +				rx_seg->length = rx_pkt_seg_lengths[i] ?
> >>> +						 rx_pkt_seg_lengths[i] :
> >>> +						 mbuf_data_size[mp_n] -
> >> RTE_PKTMBUF_HEADROOM;
> >>> +				rx_seg->offset = i < rx_pkt_nb_offs ?
> >>> +						 rx_pkt_seg_offsets[i] : 0;
> >>> +				rx_seg->mp = mpx ? mpx : mp;
> >>> +			}
> >>> +		}
> >>> +		rx_conf->rx_nseg = rx_pkt_nb_segs;
> >>> +		rx_conf->rx_seg = rx_useg;
> >>> +	} else {
> >>> +		for (i = 0; i < mbuf_data_size_n; i++) {
> >>> +			mpx = mbuf_pool_find(socket_id, i);
> >>> +			rx_mempool[i] = mpx ? mpx : mp;
> >>>    		}
> >>> +		rx_conf->rx_mempools = rx_mempool;
> >>> +		rx_conf->rx_nmempool = mbuf_data_size_n;
> >>>    	}
> >>> -	rx_conf->rx_nseg = rx_pkt_nb_segs;
> >>> -	rx_conf->rx_seg = rx_useg;
> >>>    	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
> >>>    				    socket_id, rx_conf, NULL);
> >>>    	rx_conf->rx_seg = NULL;
> >>> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index
> >>> e65be323b8..14be10dcef 100644
> >>> --- a/app/test-pmd/testpmd.h
> >>> +++ b/app/test-pmd/testpmd.h
> >>> @@ -80,6 +80,9 @@ extern uint8_t cl_quit;
> >>>
> >>>    #define MIN_TOTAL_NUM_MBUFS 1024
> >>>
> >>> +/* Maximum number of pools supported per Rx queue */ #define
> >>> +MAX_MEMPOOL 8
> >>> +
> >>>    typedef uint8_t  lcoreid_t;
> >>>    typedef uint16_t portid_t;
> >>>    typedef uint16_t queueid_t;
> >>> diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c index
> >>> fd98e8b51d..f9df5f69ef 100644
> >>> --- a/app/test-pmd/util.c
> >>> +++ b/app/test-pmd/util.c
> >>> @@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t
> queue,
> >> struct rte_mbuf *pkts[],
> >>>    		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
> >>>    				 print_buf, buf_size, &cur_len);
> >>>    		MKDUMPSTR(print_buf, buf_size, cur_len,
> >>> -			  " - type=0x%04x - length=%u - nb_segs=%d",
> >>> -			  eth_type, (unsigned int) mb->pkt_len,
> >>> +			  " - pool=%s - type=0x%04x - length=%u -
> >> nb_segs=%d",
> >>> +			  mb->pool->name, eth_type, (unsigned int) mb-
> >>> pkt_len,
> >>>    			  (int)mb->nb_segs);
> >>>    		ol_flags = mb->ol_flags;
> >>>    		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {


^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v12 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-10-25  1:40                 ` [PATCH v11 " Hanumanth Pothula
  2022-11-01 14:13                   ` Hanumanth Reddy Pothula
  2022-11-03 12:15                   ` Singh, Aman Deep
@ 2022-11-07  5:31                   ` Hanumanth Pothula
  2022-11-09  8:04                     ` Singh, Aman Deep
  2022-11-10  8:17                     ` [PATCH v13 " Hanumanth Pothula
  2 siblings, 2 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-11-07  5:31 UTC (permalink / raw)
  To: Aman Singh, Yuying Zhang
  Cc: dev, andrew.rybchenko, thomas, jerinj, ndabilpuram, hpothula

Some of the HW has support for choosing memory pools based on
the packet's size. The pool sort capability allows PMD/NIC to
choose a memory pool based on the packet's length.

On multiple mempool support enabled, populate mempool array
accordingly. Also, print pool name on which packet is received.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
v12:
 - Process multi-segment configuration on number segments
   (rx_pkt_nb_segs) greater than 1 or buffer split offload
   flag (RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) set.
v11:
 - Resolve compilation and warning.
v10:
 - Populate multi-mempool array based on mbuf_data_size_n instead
   of rx_pkt_nb_segs.
---
 app/test-pmd/testpmd.c | 61 +++++++++++++++++++++++++++---------------
 app/test-pmd/testpmd.h |  3 +++
 app/test-pmd/util.c    |  4 +--
 3 files changed, 44 insertions(+), 24 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 5b0f0838dc..cb3b6be8db 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2647,11 +2647,19 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
 {
 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
+	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
+	struct rte_mempool *mpx;
 	unsigned int i, mp_n;
 	int ret;
 
-	if (rx_pkt_nb_segs <= 1 ||
-	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
+	/* Verify Rx queue configuration is single pool and segment or
+	 * multiple pool/segment.
+	 * @see rte_eth_rxconf::rx_mempools
+	 * @see rte_eth_rxconf::rx_seg
+	 */
+	if (!(mbuf_data_size_n > 1) && !(rx_pkt_nb_segs > 1 ||
+	    ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) != 0))) {
+		/* Single pool/segment configuration */
 		rx_conf->rx_seg = NULL;
 		rx_conf->rx_nseg = 0;
 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
@@ -2659,33 +2667,42 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 					     rx_conf, mp);
 		goto exit;
 	}
-	for (i = 0; i < rx_pkt_nb_segs; i++) {
-		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
-		struct rte_mempool *mpx;
-		/*
-		 * Use last valid pool for the segments with number
-		 * exceeding the pool index.
-		 */
-		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
-		mpx = mbuf_pool_find(socket_id, mp_n);
-		/* Handle zero as mbuf data buffer size. */
-		rx_seg->offset = i < rx_pkt_nb_offs ?
-				   rx_pkt_seg_offsets[i] : 0;
-		rx_seg->mp = mpx ? mpx : mp;
-		if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
-			rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
-		} else {
+	if (rx_pkt_nb_segs > 1 ||
+	    rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+		/* multi-segment configuration */
+		for (i = 0; i < rx_pkt_nb_segs; i++) {
+			struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
+			/*
+			 * Use last valid pool for the segments with number
+			 * exceeding the pool index.
+			 */
+			mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
+			mpx = mbuf_pool_find(socket_id, mp_n);
+			/* Handle zero as mbuf data buffer size. */
 			rx_seg->length = rx_pkt_seg_lengths[i] ?
-					rx_pkt_seg_lengths[i] :
-					mbuf_data_size[mp_n];
+					 rx_pkt_seg_lengths[i] :
+					 mbuf_data_size[mp_n];
+			rx_seg->offset = i < rx_pkt_nb_offs ?
+					 rx_pkt_seg_offsets[i] : 0;
+			rx_seg->mp = mpx ? mpx : mp;
+		}
+		rx_conf->rx_nseg = rx_pkt_nb_segs;
+		rx_conf->rx_seg = rx_useg;
+	} else {
+		/* multi-pool configuration */
+		for (i = 0; i < mbuf_data_size_n; i++) {
+			mpx = mbuf_pool_find(socket_id, i);
+			rx_mempool[i] = mpx ? mpx : mp;
 		}
+		rx_conf->rx_mempools = rx_mempool;
+		rx_conf->rx_nmempool = mbuf_data_size_n;
 	}
-	rx_conf->rx_nseg = rx_pkt_nb_segs;
-	rx_conf->rx_seg = rx_useg;
 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
 				    socket_id, rx_conf, NULL);
 	rx_conf->rx_seg = NULL;
 	rx_conf->rx_nseg = 0;
+	rx_conf->rx_mempools = NULL;
+	rx_conf->rx_nmempool = 0;
 exit:
 	ports[port_id].rxq[rx_queue_id].state = rx_conf->rx_deferred_start ?
 						RTE_ETH_QUEUE_STATE_STOPPED :
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index e65be323b8..14be10dcef 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -80,6 +80,9 @@ extern uint8_t cl_quit;
 
 #define MIN_TOTAL_NUM_MBUFS 1024
 
+/* Maximum number of pools supported per Rx queue */
+#define MAX_MEMPOOL 8
+
 typedef uint8_t  lcoreid_t;
 typedef uint16_t portid_t;
 typedef uint16_t queueid_t;
diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index fd98e8b51d..f9df5f69ef 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
 				 print_buf, buf_size, &cur_len);
 		MKDUMPSTR(print_buf, buf_size, cur_len,
-			  " - type=0x%04x - length=%u - nb_segs=%d",
-			  eth_type, (unsigned int) mb->pkt_len,
+			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
+			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
 		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v12 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-11-07  5:31                   ` [PATCH v12 " Hanumanth Pothula
@ 2022-11-09  8:04                     ` Singh, Aman Deep
  2022-11-09 10:39                       ` Andrew Rybchenko
  2022-11-10  8:17                     ` [PATCH v13 " Hanumanth Pothula
  1 sibling, 1 reply; 75+ messages in thread
From: Singh, Aman Deep @ 2022-11-09  8:04 UTC (permalink / raw)
  To: Hanumanth Pothula, Yuying Zhang
  Cc: dev, andrew.rybchenko, thomas, jerinj, ndabilpuram



On 11/7/2022 11:01 AM, Hanumanth Pothula wrote:
> Some of the HW has support for choosing memory pools based on
> the packet's size. The pool sort capability allows PMD/NIC to
> choose a memory pool based on the packet's length.
>
> On multiple mempool support enabled, populate mempool array
> accordingly. Also, print pool name on which packet is received.
>
> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>

Acked-by: Aman Singh <aman.deep.singh@intel.com>

<snip>


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v12 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-11-09  8:04                     ` Singh, Aman Deep
@ 2022-11-09 10:39                       ` Andrew Rybchenko
  2022-11-10  6:51                         ` Andrew Rybchenko
  0 siblings, 1 reply; 75+ messages in thread
From: Andrew Rybchenko @ 2022-11-09 10:39 UTC (permalink / raw)
  To: Singh, Aman Deep, Hanumanth Pothula, Yuying Zhang
  Cc: dev, thomas, jerinj, ndabilpuram

On 11/9/22 11:04, Singh, Aman Deep wrote:
> On 11/7/2022 11:01 AM, Hanumanth Pothula wrote:
>> Some of the HW has support for choosing memory pools based on
>> the packet's size. The pool sort capability allows PMD/NIC to
>> choose a memory pool based on the packet's length.
>>
>> On multiple mempool support enabled, populate mempool array
>> accordingly. Also, print pool name on which packet is received.
>>
>> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> 
> Acked-by: Aman Singh <aman.deep.singh@intel.com>

Applied to dpdk-next-net/main, thanks.


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v12 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-11-09 10:39                       ` Andrew Rybchenko
@ 2022-11-10  6:51                         ` Andrew Rybchenko
  0 siblings, 0 replies; 75+ messages in thread
From: Andrew Rybchenko @ 2022-11-10  6:51 UTC (permalink / raw)
  To: Singh, Aman Deep, Hanumanth Pothula, Yuying Zhang
  Cc: dev, thomas, jerinj, ndabilpuram

On 11/9/22 13:39, Andrew Rybchenko wrote:
> On 11/9/22 11:04, Singh, Aman Deep wrote:
>> On 11/7/2022 11:01 AM, Hanumanth Pothula wrote:
>>> Some of the HW has support for choosing memory pools based on
>>> the packet's size. The pool sort capability allows PMD/NIC to
>>> choose a memory pool based on the packet's length.
>>>
>>> On multiple mempool support enabled, populate mempool array
>>> accordingly. Also, print pool name on which packet is received.
>>>
>>> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
>>
>> Acked-by: Aman Singh <aman.deep.singh@intel.com>
> 
> Applied to dpdk-next-net/main, thanks.
> 

I'm sorry, I've removed the patch from next-net/main since it
breaks protocol-based header split as far as I can see.

^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v13 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-11-07  5:31                   ` [PATCH v12 " Hanumanth Pothula
  2022-11-09  8:04                     ` Singh, Aman Deep
@ 2022-11-10  8:17                     ` Hanumanth Pothula
  2022-11-10  9:01                       ` Andrew Rybchenko
  2022-11-10 10:16                       ` [PATCH v14 " Hanumanth Pothula
  1 sibling, 2 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-11-10  8:17 UTC (permalink / raw)
  To: Aman Singh, Yuying Zhang
  Cc: dev, andrew.rybchenko, thomas, jerinj, ndabilpuram, hpothula

Some of the HW has support for choosing memory pools based on
the packet's size. The pool sort capability allows PMD/NIC to
choose a memory pool based on the packet's length.

On multiple mempool support enabled, populate mempool array
accordingly. Also, print pool name on which packet is received.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>

v13:
 - Make sure protocol-based header split feature is not broken
   by updating changes with latest code base.
v12:
 - Process multi-segment configuration on number segments
   (rx_pkt_nb_segs) greater than 1 or buffer split offload
   flag (RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) set.
v11:
 - Resolve compilation and warning.
v10:
 - Populate multi-mempool array based on mbuf_data_size_n instead
   of rx_pkt_nb_segs.
---
 app/test-pmd/testpmd.c | 65 ++++++++++++++++++++++++++++--------------
 app/test-pmd/testpmd.h |  3 ++
 app/test-pmd/util.c    |  4 +--
 3 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 5b0f0838dc..78ea19fcbb 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2647,11 +2647,19 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
 {
 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
+	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
+	struct rte_mempool *mpx;
 	unsigned int i, mp_n;
 	int ret;
 
-	if (rx_pkt_nb_segs <= 1 ||
-	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
+	/* Verify Rx queue configuration is single pool and segment or
+	 * multiple pool/segment.
+	 * @see rte_eth_rxconf::rx_mempools
+	 * @see rte_eth_rxconf::rx_seg
+	 */
+	if (!(mbuf_data_size_n > 1) && !(rx_pkt_nb_segs > 1 ||
+	    ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) != 0))) {
+		/* Single pool/segment configuration */
 		rx_conf->rx_seg = NULL;
 		rx_conf->rx_nseg = 0;
 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
@@ -2659,33 +2667,46 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 					     rx_conf, mp);
 		goto exit;
 	}
-	for (i = 0; i < rx_pkt_nb_segs; i++) {
-		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
-		struct rte_mempool *mpx;
-		/*
-		 * Use last valid pool for the segments with number
-		 * exceeding the pool index.
-		 */
-		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
-		mpx = mbuf_pool_find(socket_id, mp_n);
-		/* Handle zero as mbuf data buffer size. */
-		rx_seg->offset = i < rx_pkt_nb_offs ?
-				   rx_pkt_seg_offsets[i] : 0;
-		rx_seg->mp = mpx ? mpx : mp;
-		if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
-			rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
-		} else {
-			rx_seg->length = rx_pkt_seg_lengths[i] ?
-					rx_pkt_seg_lengths[i] :
-					mbuf_data_size[mp_n];
+
+	if (rx_pkt_nb_segs > 1 ||
+	    rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+		for (i = 0; i < rx_pkt_nb_segs; i++) {
+			struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
+			/*
+			 * Use last valid pool for the segments with number
+			 * exceeding the pool index.
+			 */
+			mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
+			mpx = mbuf_pool_find(socket_id, mp_n);
+			/* Handle zero as mbuf data buffer size. */
+			rx_seg->offset = i < rx_pkt_nb_offs ?
+					   rx_pkt_seg_offsets[i] : 0;
+			rx_seg->mp = mpx ? mpx : mp;
+			if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
+				rx_seg->proto_hdr = rx_pkt_hdr_protos[i];
+			} else {
+				rx_seg->length = rx_pkt_seg_lengths[i] ?
+						rx_pkt_seg_lengths[i] :
+						mbuf_data_size[mp_n];
+			}
 		}
-	}
 	rx_conf->rx_nseg = rx_pkt_nb_segs;
 	rx_conf->rx_seg = rx_useg;
+	} else {
+		/* multi-pool configuration */
+		for (i = 0; i < mbuf_data_size_n; i++) {
+			mpx = mbuf_pool_find(socket_id, i);
+			rx_mempool[i] = mpx ? mpx : mp;
+		}
+		rx_conf->rx_mempools = rx_mempool;
+		rx_conf->rx_nmempool = mbuf_data_size_n;
+	}
 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
 				    socket_id, rx_conf, NULL);
 	rx_conf->rx_seg = NULL;
 	rx_conf->rx_nseg = 0;
+	rx_conf->rx_mempools = NULL;
+	rx_conf->rx_nmempool = 0;
 exit:
 	ports[port_id].rxq[rx_queue_id].state = rx_conf->rx_deferred_start ?
 						RTE_ETH_QUEUE_STATE_STOPPED :
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index e65be323b8..14be10dcef 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -80,6 +80,9 @@ extern uint8_t cl_quit;
 
 #define MIN_TOTAL_NUM_MBUFS 1024
 
+/* Maximum number of pools supported per Rx queue */
+#define MAX_MEMPOOL 8
+
 typedef uint8_t  lcoreid_t;
 typedef uint16_t portid_t;
 typedef uint16_t queueid_t;
diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index fd98e8b51d..f9df5f69ef 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
 				 print_buf, buf_size, &cur_len);
 		MKDUMPSTR(print_buf, buf_size, cur_len,
-			  " - type=0x%04x - length=%u - nb_segs=%d",
-			  eth_type, (unsigned int) mb->pkt_len,
+			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
+			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
 		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v13 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-11-10  8:17                     ` [PATCH v13 " Hanumanth Pothula
@ 2022-11-10  9:01                       ` Andrew Rybchenko
  2022-11-10  9:31                         ` [EXT] " Hanumanth Reddy Pothula
  2022-11-10 10:16                       ` [PATCH v14 " Hanumanth Pothula
  1 sibling, 1 reply; 75+ messages in thread
From: Andrew Rybchenko @ 2022-11-10  9:01 UTC (permalink / raw)
  To: Hanumanth Pothula, Aman Singh, Yuying Zhang
  Cc: dev, thomas, jerinj, ndabilpuram

On 11/10/22 11:17, Hanumanth Pothula wrote:
> Some of the HW has support for choosing memory pools based on
> the packet's size. The pool sort capability allows PMD/NIC to
> choose a memory pool based on the packet's length.
> 
> On multiple mempool support enabled, populate mempool array
> accordingly. Also, print pool name on which packet is received.
> 
> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> 
> v13:
>   - Make sure protocol-based header split feature is not broken
>     by updating changes with latest code base.
> v12:
>   - Process multi-segment configuration on number segments
>     (rx_pkt_nb_segs) greater than 1 or buffer split offload
>     flag (RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) set.
> v11:
>   - Resolve compilation and warning.
> v10:
>   - Populate multi-mempool array based on mbuf_data_size_n instead
>     of rx_pkt_nb_segs.

I'm sorry for inconvenience, could you rebase the patch on
current next-net/main, please. I've decided to apply protocol
based buffer split fix first. Of course, I can rebase myself,
but I want result to be checked very carefully and tested
properly. Thanks.


^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [EXT] Re: [PATCH v13 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-11-10  9:01                       ` Andrew Rybchenko
@ 2022-11-10  9:31                         ` Hanumanth Reddy Pothula
  0 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Reddy Pothula @ 2022-11-10  9:31 UTC (permalink / raw)
  To: Andrew Rybchenko, Aman Singh, Yuying Zhang
  Cc: dev, thomas, Jerin Jacob Kollanukkaran, Nithin Kumar Dabilpuram



> -----Original Message-----
> From: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
> Sent: Thursday, November 10, 2022 2:31 PM
> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Aman Singh
> <aman.deep.singh@intel.com>; Yuying Zhang <yuying.zhang@intel.com>
> Cc: dev@dpdk.org; thomas@monjalon.net; Jerin Jacob Kollanukkaran
> <jerinj@marvell.com>; Nithin Kumar Dabilpuram
> <ndabilpuram@marvell.com>
> Subject: [EXT] Re: [PATCH v13 1/1] app/testpmd: support multiple mbuf
> pools per Rx queue
> 
> External Email
> 
> ----------------------------------------------------------------------
> On 11/10/22 11:17, Hanumanth Pothula wrote:
> > Some of the HW has support for choosing memory pools based on the
> > packet's size. The pool sort capability allows PMD/NIC to choose a
> > memory pool based on the packet's length.
> >
> > On multiple mempool support enabled, populate mempool array
> > accordingly. Also, print pool name on which packet is received.
> >
> > Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> >
> > v13:
> >   - Make sure protocol-based header split feature is not broken
> >     by updating changes with latest code base.
> > v12:
> >   - Process multi-segment configuration on number segments
> >     (rx_pkt_nb_segs) greater than 1 or buffer split offload
> >     flag (RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) set.
> > v11:
> >   - Resolve compilation and warning.
> > v10:
> >   - Populate multi-mempool array based on mbuf_data_size_n instead
> >     of rx_pkt_nb_segs.
> 
> I'm sorry for inconvenience, could you rebase the patch on current next-
> net/main, please. I've decided to apply protocol based buffer split fix first.
> Of course, I can rebase myself, but I want result to be checked very carefully
> and tested properly. Thanks.
Sure will do that.

^ permalink raw reply	[flat|nested] 75+ messages in thread

* [PATCH v14 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-11-10  8:17                     ` [PATCH v13 " Hanumanth Pothula
  2022-11-10  9:01                       ` Andrew Rybchenko
@ 2022-11-10 10:16                       ` Hanumanth Pothula
  2022-11-10 10:47                         ` Andrew Rybchenko
  2022-11-17  8:43                         ` Jiang, YuX
  1 sibling, 2 replies; 75+ messages in thread
From: Hanumanth Pothula @ 2022-11-10 10:16 UTC (permalink / raw)
  To: Aman Singh, Yuying Zhang
  Cc: dev, andrew.rybchenko, thomas, jerinj, ndabilpuram, hpothula

Some of the HW has support for choosing memory pools based on
the packet's size. The pool sort capability allows PMD/NIC to
choose a memory pool based on the packet's length.

On multiple mempool support enabled, populate mempool array
accordingly. Also, print pool name on which packet is received.

Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>

v14:
 - Rebased on tip of next-net/main
v13:
 - Make sure protocol-based header split feature is not broken
   by updating changes with latest code base.
v12:
 - Process multi-segment configuration on number segments
   (rx_pkt_nb_segs) greater than 1 or buffer split offload
   flag (RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) set.
v11:
 - Resolve compilation and warning.
v10:
 - Populate multi-mempool array based on mbuf_data_size_n instead
   of rx_pkt_nb_segs.
---
 app/test-pmd/testpmd.c | 70 +++++++++++++++++++++++++++---------------
 app/test-pmd/testpmd.h |  3 ++
 app/test-pmd/util.c    |  4 +--
 3 files changed, 51 insertions(+), 26 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index d494870e59..ef281ccd20 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2653,12 +2653,20 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
 {
 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
+	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
+	struct rte_mempool *mpx;
 	unsigned int i, mp_n;
 	uint32_t prev_hdrs = 0;
 	int ret;
 
-	if (rx_pkt_nb_segs <= 1 ||
-	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
+	/* Verify Rx queue configuration is single pool and segment or
+	 * multiple pool/segment.
+	 * @see rte_eth_rxconf::rx_mempools
+	 * @see rte_eth_rxconf::rx_seg
+	 */
+	if (!(mbuf_data_size_n > 1) && !(rx_pkt_nb_segs > 1 ||
+	    ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) != 0))) {
+		/* Single pool/segment configuration */
 		rx_conf->rx_seg = NULL;
 		rx_conf->rx_nseg = 0;
 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
@@ -2666,34 +2674,48 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 					     rx_conf, mp);
 		goto exit;
 	}
-	for (i = 0; i < rx_pkt_nb_segs; i++) {
-		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
-		struct rte_mempool *mpx;
-		/*
-		 * Use last valid pool for the segments with number
-		 * exceeding the pool index.
-		 */
-		mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
-		mpx = mbuf_pool_find(socket_id, mp_n);
-		/* Handle zero as mbuf data buffer size. */
-		rx_seg->offset = i < rx_pkt_nb_offs ?
-				   rx_pkt_seg_offsets[i] : 0;
-		rx_seg->mp = mpx ? mpx : mp;
-		if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
-			rx_seg->proto_hdr = rx_pkt_hdr_protos[i] & ~prev_hdrs;
-			prev_hdrs |= rx_seg->proto_hdr;
-		} else {
-			rx_seg->length = rx_pkt_seg_lengths[i] ?
-					rx_pkt_seg_lengths[i] :
-					mbuf_data_size[mp_n];
+
+	if (rx_pkt_nb_segs > 1 ||
+	    rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+		/* multi-segment configuration */
+		for (i = 0; i < rx_pkt_nb_segs; i++) {
+			struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
+			/*
+			 * Use last valid pool for the segments with number
+			 * exceeding the pool index.
+			 */
+			mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
+			mpx = mbuf_pool_find(socket_id, mp_n);
+			/* Handle zero as mbuf data buffer size. */
+			rx_seg->offset = i < rx_pkt_nb_offs ?
+					   rx_pkt_seg_offsets[i] : 0;
+			rx_seg->mp = mpx ? mpx : mp;
+			if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
+				rx_seg->proto_hdr = rx_pkt_hdr_protos[i] & ~prev_hdrs;
+				prev_hdrs |= rx_seg->proto_hdr;
+			} else {
+				rx_seg->length = rx_pkt_seg_lengths[i] ?
+						rx_pkt_seg_lengths[i] :
+						mbuf_data_size[mp_n];
+			}
+		}
+		rx_conf->rx_nseg = rx_pkt_nb_segs;
+		rx_conf->rx_seg = rx_useg;
+	} else {
+		/* multi-pool configuration */
+		for (i = 0; i < mbuf_data_size_n; i++) {
+			mpx = mbuf_pool_find(socket_id, i);
+			rx_mempool[i] = mpx ? mpx : mp;
 		}
+		rx_conf->rx_mempools = rx_mempool;
+		rx_conf->rx_nmempool = mbuf_data_size_n;
 	}
-	rx_conf->rx_nseg = rx_pkt_nb_segs;
-	rx_conf->rx_seg = rx_useg;
 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
 				    socket_id, rx_conf, NULL);
 	rx_conf->rx_seg = NULL;
 	rx_conf->rx_nseg = 0;
+	rx_conf->rx_mempools = NULL;
+	rx_conf->rx_nmempool = 0;
 exit:
 	ports[port_id].rxq[rx_queue_id].state = rx_conf->rx_deferred_start ?
 						RTE_ETH_QUEUE_STATE_STOPPED :
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 6aa85e74ee..05ca8628cf 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -80,6 +80,9 @@ extern uint8_t cl_quit;
 
 #define MIN_TOTAL_NUM_MBUFS 1024
 
+/* Maximum number of pools supported per Rx queue */
+#define MAX_MEMPOOL 8
+
 typedef uint8_t  lcoreid_t;
 typedef uint16_t portid_t;
 typedef uint16_t queueid_t;
diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
index fd98e8b51d..f9df5f69ef 100644
--- a/app/test-pmd/util.c
+++ b/app/test-pmd/util.c
@@ -150,8 +150,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
 		print_ether_addr(" - dst=", &eth_hdr->dst_addr,
 				 print_buf, buf_size, &cur_len);
 		MKDUMPSTR(print_buf, buf_size, cur_len,
-			  " - type=0x%04x - length=%u - nb_segs=%d",
-			  eth_type, (unsigned int) mb->pkt_len,
+			  " - pool=%s - type=0x%04x - length=%u - nb_segs=%d",
+			  mb->pool->name, eth_type, (unsigned int) mb->pkt_len,
 			  (int)mb->nb_segs);
 		ol_flags = mb->ol_flags;
 		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH v14 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-11-10 10:16                       ` [PATCH v14 " Hanumanth Pothula
@ 2022-11-10 10:47                         ` Andrew Rybchenko
  2022-11-17  8:43                         ` Jiang, YuX
  1 sibling, 0 replies; 75+ messages in thread
From: Andrew Rybchenko @ 2022-11-10 10:47 UTC (permalink / raw)
  To: Hanumanth Pothula, Aman Singh, Yuying Zhang
  Cc: dev, thomas, jerinj, ndabilpuram

On 11/10/22 13:16, Hanumanth Pothula wrote:
> Some of the HW has support for choosing memory pools based on
> the packet's size. The pool sort capability allows PMD/NIC to
> choose a memory pool based on the packet's length.
> 
> On multiple mempool support enabled, populate mempool array
> accordingly. Also, print pool name on which packet is received.
> 
> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> 
> v14:
>   - Rebased on tip of next-net/main
> v13:
>   - Make sure protocol-based header split feature is not broken
>     by updating changes with latest code base.
> v12:
>   - Process multi-segment configuration on number segments
>     (rx_pkt_nb_segs) greater than 1 or buffer split offload
>     flag (RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) set.
> v11:
>   - Resolve compilation and warning.
> v10:
>   - Populate multi-mempool array based on mbuf_data_size_n instead
>     of rx_pkt_nb_segs.

Reviewed-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>

Applied to dpdk-next-net/main, thanks.


^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [PATCH v14 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-11-10 10:16                       ` [PATCH v14 " Hanumanth Pothula
  2022-11-10 10:47                         ` Andrew Rybchenko
@ 2022-11-17  8:43                         ` Jiang, YuX
  2022-11-17 11:38                           ` Hanumanth Reddy Pothula
  1 sibling, 1 reply; 75+ messages in thread
From: Jiang, YuX @ 2022-11-17  8:43 UTC (permalink / raw)
  To: Hanumanth Pothula, Singh, Aman Deep, Zhang, Yuying
  Cc: dev, andrew.rybchenko, thomas, jerinj, ndabilpuram

Hi Hanumanth,

We meet an issue on this patch, can you pls have a look quickly?
https://bugs.dpdk.org/show_bug.cgi?id=1128

Best regards,
Yu Jiang

> -----Original Message-----
> From: Hanumanth Pothula <hpothula@marvell.com>
> Sent: Thursday, November 10, 2022 6:17 PM
> To: Singh, Aman Deep <aman.deep.singh@intel.com>; Zhang, Yuying
> <yuying.zhang@intel.com>
> Cc: dev@dpdk.org; andrew.rybchenko@oktetlabs.ru; thomas@monjalon.net;
> jerinj@marvell.com; ndabilpuram@marvell.com; hpothula@marvell.com
> Subject: [PATCH v14 1/1] app/testpmd: support multiple mbuf pools per Rx
> queue
> 
> Some of the HW has support for choosing memory pools based on the packet's
> size. The pool sort capability allows PMD/NIC to choose a memory pool based
> on the packet's length.
> 
> On multiple mempool support enabled, populate mempool array accordingly.
> Also, print pool name on which packet is received.
> 
> Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> 

^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [PATCH v14 1/1] app/testpmd: support multiple mbuf pools per Rx queue
  2022-11-17  8:43                         ` Jiang, YuX
@ 2022-11-17 11:38                           ` Hanumanth Reddy Pothula
  0 siblings, 0 replies; 75+ messages in thread
From: Hanumanth Reddy Pothula @ 2022-11-17 11:38 UTC (permalink / raw)
  To: Jiang, YuX, Singh, Aman Deep, Zhang, Yuying
  Cc: dev, andrew.rybchenko, thomas, Jerin Jacob Kollanukkaran,
	Nithin Kumar Dabilpuram

Hi Yu Jiang,

Please find the fix for below issue, 
https://patches.dpdk.org/project/dpdk/patch/20221117113047.3088461-1-hpothula@marvell.com

Verified changes locally, both with/without multi-mempool support.

Regards,
Hanumanth

> -----Original Message-----
> From: Jiang, YuX <yux.jiang@intel.com>
> Sent: Thursday, November 17, 2022 2:13 PM
> To: Hanumanth Reddy Pothula <hpothula@marvell.com>; Singh, Aman
> Deep <aman.deep.singh@intel.com>; Zhang, Yuying
> <yuying.zhang@intel.com>
> Cc: dev@dpdk.org; andrew.rybchenko@oktetlabs.ru;
> thomas@monjalon.net; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>
> Subject: [EXT] RE: [PATCH v14 1/1] app/testpmd: support multiple mbuf
> pools per Rx queue
> 
> External Email
> 
> ----------------------------------------------------------------------
> Hi Hanumanth,
> 
> We meet an issue on this patch, can you pls have a look quickly?
> https://urldefense.proofpoint.com/v2/url?u=https-
> 3A__bugs.dpdk.org_show-5Fbug.cgi-3Fid-
> 3D1128&d=DwIFAg&c=nKjWec2b6R0mOyPaz7xtfQ&r=ZXuJnLKRi2OwoXx-
> DBHWwiPuGzcSlH1FHkeNRty_2pQ&m=CWMu6OgmCaCZqYSpbjlxN8XS2otz
> 7qzAU8raSE9f1jdzXi7Cr4kq0OKYTN1MYLex&s=EhkcKAk_QsFYhE_rH1K1n2z
> pzCyQFEmUc-9_fPNPrFQ&e=
> 
> Best regards,
> Yu Jiang
> 
> > -----Original Message-----
> > From: Hanumanth Pothula <hpothula@marvell.com>
> > Sent: Thursday, November 10, 2022 6:17 PM
> > To: Singh, Aman Deep <aman.deep.singh@intel.com>; Zhang, Yuying
> > <yuying.zhang@intel.com>
> > Cc: dev@dpdk.org; andrew.rybchenko@oktetlabs.ru;
> thomas@monjalon.net;
> > jerinj@marvell.com; ndabilpuram@marvell.com; hpothula@marvell.com
> > Subject: [PATCH v14 1/1] app/testpmd: support multiple mbuf pools per
> > Rx queue
> >
> > Some of the HW has support for choosing memory pools based on the
> > packet's size. The pool sort capability allows PMD/NIC to choose a
> > memory pool based on the packet's length.
> >
> > On multiple mempool support enabled, populate mempool array
> accordingly.
> > Also, print pool name on which packet is received.
> >
> > Signed-off-by: Hanumanth Pothula <hpothula@marvell.com>
> >

^ permalink raw reply	[flat|nested] 75+ messages in thread

end of thread, other threads:[~2022-11-17 11:38 UTC | newest]

Thread overview: 75+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-12 10:46 [PATCH v1 1/1] ethdev: introduce pool sort capability Hanumanth Pothula
2022-08-12 13:27 ` Morten Brørup
2022-08-12 17:24 ` [PATCH v2 1/3] " Hanumanth Pothula
2022-08-12 17:24   ` [PATCH v2 2/3] app/testpmd: add command line argument 'rxseg-mode' Hanumanth Pothula
2022-08-12 17:24   ` [PATCH v2 3/3] net/cnxk: introduce pool sort capability Hanumanth Pothula
2022-08-23  3:26   ` [PATCH v2 1/3] ethdev: " Ding, Xuan
2022-08-24 15:33     ` Ferruh Yigit
2022-08-30 12:08       ` [EXT] " Hanumanth Reddy Pothula
2022-09-06 12:18         ` Ferruh Yigit
2022-09-07  7:02           ` Hanumanth Reddy Pothula
2022-09-07 11:24             ` Ferruh Yigit
2022-09-07 21:31               ` Hanumanth Reddy Pothula
2022-09-13  9:28                 ` Ferruh Yigit
2022-09-13 10:00                   ` Hanumanth Reddy Pothula
2022-09-02  7:00   ` [PATCH v3 " Hanumanth Pothula
2022-09-02  7:00     ` [PATCH v3 2/3] app/testpmd: Add support for " Hanumanth Pothula
2022-09-02  7:00     ` [PATCH v3 3/3] net/cnxk: introduce " Hanumanth Pothula
2022-09-13  8:06     ` [PATCH v3 1/3] ethdev: " Andrew Rybchenko
2022-09-13  9:31       ` Ferruh Yigit
2022-09-13 10:41         ` [EXT] " Hanumanth Reddy Pothula
2022-09-15  7:07     ` [PATCH v4 1/3] ethdev: Add support for mulitiple mbuf pools per Rx queue Hanumanth Pothula
2022-09-15  7:07       ` [PATCH v4 2/3] app/testpmd: " Hanumanth Pothula
2022-09-15  7:07       ` [PATCH v4 3/3] net/cnxk: Add support for mulitiple mbuf pools Hanumanth Pothula
2022-09-28  9:43       ` [PATCH v4 1/3] ethdev: Add support for mulitiple mbuf pools per Rx queue Andrew Rybchenko
2022-09-28 11:06         ` Thomas Monjalon
2022-10-06 17:01       ` [PATCH v5 1/3] ethdev: support " Hanumanth Pothula
2022-10-06 17:01         ` [PATCH v5 2/3] net/cnxk: " Hanumanth Pothula
2022-10-06 17:01         ` [PATCH v5 3/3] app/testpmd: " Hanumanth Pothula
2022-10-06 17:29         ` [PATCH v5 1/3] ethdev: " Stephen Hemminger
2022-10-07 14:13           ` Andrew Rybchenko
2022-10-06 17:53         ` [PATCH v6 " Hanumanth Pothula
2022-10-06 17:53           ` [PATCH v6 2/3] net/cnxk: " Hanumanth Pothula
2022-10-06 17:53           ` [PATCH v6 3/3] app/testpmd: " Hanumanth Pothula
2022-10-06 18:14           ` [PATCH v6 1/3] ethdev: " Hanumanth Reddy Pothula
2022-10-07 14:37         ` [PATCH v7 0/4] " Andrew Rybchenko
2022-10-07 14:37           ` [PATCH v7 1/4] ethdev: factor out helper function to check Rx mempool Andrew Rybchenko
2022-10-07 14:37           ` [PATCH v7 2/4] ethdev: support mulitiple mbuf pools per Rx queue Andrew Rybchenko
2022-10-07 16:08             ` Thomas Monjalon
2022-10-07 16:18               ` Stephen Hemminger
2022-10-07 16:20                 ` Stephen Hemminger
2022-10-07 16:33                   ` Andrew Rybchenko
2022-10-07 17:30               ` Andrew Rybchenko
2022-10-07 14:37           ` [PATCH v7 3/4] net/cnxk: " Andrew Rybchenko
2022-10-07 14:37           ` [PATCH v7 4/4] app/testpmd: " Andrew Rybchenko
2022-10-07 17:29         ` [PATCH v8 0/4] ethdev: " Andrew Rybchenko
2022-10-07 17:29           ` [PATCH v8 1/4] ethdev: factor out helper function to check Rx mempool Andrew Rybchenko
2022-10-07 17:29           ` [PATCH v8 2/4] ethdev: support multiple mbuf pools per Rx queue Andrew Rybchenko
2022-10-07 18:35             ` Thomas Monjalon
2022-10-07 19:45               ` Andrew Rybchenko
2022-10-07 17:29           ` [PATCH v8 3/4] net/cnxk: support mulitiple " Andrew Rybchenko
2022-10-07 17:29           ` [PATCH v8 4/4] app/testpmd: " Andrew Rybchenko
     [not found]             ` <PH0PR18MB47500560DC1793F68E7312DDCB5F9@PH0PR18MB4750.namprd18.prod.outlook.com>
2022-10-07 19:43               ` [EXT] " Andrew Rybchenko
2022-10-07 19:56                 ` Hanumanth Reddy Pothula
2022-10-17  8:48             ` [PATCH v9 1/1] " Hanumanth Pothula
2022-10-21 15:57               ` Singh, Aman Deep
2022-10-24  3:32                 ` [EXT] " Hanumanth Reddy Pothula
2022-10-24  4:07               ` [PATCH v10 1/1] app/testpmd: support multiple " Hanumanth Pothula
2022-10-25  1:40                 ` [PATCH v11 " Hanumanth Pothula
2022-11-01 14:13                   ` Hanumanth Reddy Pothula
2022-11-03 12:15                   ` Singh, Aman Deep
2022-11-03 12:36                     ` [EXT] " Hanumanth Reddy Pothula
2022-11-03 15:20                       ` Singh, Aman Deep
2022-11-04 15:38                         ` Hanumanth Reddy Pothula
2022-11-07  5:31                   ` [PATCH v12 " Hanumanth Pothula
2022-11-09  8:04                     ` Singh, Aman Deep
2022-11-09 10:39                       ` Andrew Rybchenko
2022-11-10  6:51                         ` Andrew Rybchenko
2022-11-10  8:17                     ` [PATCH v13 " Hanumanth Pothula
2022-11-10  9:01                       ` Andrew Rybchenko
2022-11-10  9:31                         ` [EXT] " Hanumanth Reddy Pothula
2022-11-10 10:16                       ` [PATCH v14 " Hanumanth Pothula
2022-11-10 10:47                         ` Andrew Rybchenko
2022-11-17  8:43                         ` Jiang, YuX
2022-11-17 11:38                           ` Hanumanth Reddy Pothula
2022-10-08 20:38           ` [PATCH v8 0/4] ethdev: support mulitiple " Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).