Thanks for the comments.
From: Константин Ананьев <konstantin.v.ananyev@yandex.ru>
Sent: Monday, June 5, 2023 8:54 PM
To: Feifei Wang <Feifei.Wang2@arm.com>; thomas@monjalon.net; Ferruh Yigit <ferruh.yigit@amd.com>; Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang <Ruifeng.Wang@arm.com>
Subject: Re: [PATCH v6 1/4] ethdev: add API for mbufs recycle mode
Hi Feifei,
few more comments from me, see below.
Add 'rte_eth_recycle_rx_queue_info_get' and 'rte_eth_recycle_mbufs'
APIs to recycle used mbufs from a transmit queue of an Ethernet device,
and move these mbufs into a mbuf ring for a receive queue of an Ethernet
device. This can bypass mempool 'put/get' operations hence saving CPU
cycles.
For each recycling mbufs, the rte_eth_recycle_mbufs() function performs
the following operations:
- Copy used *rte_mbuf* buffer pointers from Tx mbuf ring into Rx mbuf
ring.
- Replenish the Rx descriptors with the recycling *rte_mbuf* mbufs freed
from the Tx mbuf ring.
Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
doc/guides/rel_notes/release_23_07.rst | 7 +
lib/ethdev/ethdev_driver.h | 10 ++
lib/ethdev/ethdev_private.c | 2 +
lib/ethdev/rte_ethdev.c | 31 +++++
lib/ethdev/rte_ethdev.h | 182 +++++++++++++++++++++++++
lib/ethdev/rte_ethdev_core.h | 15 +-
lib/ethdev/version.map | 4 +
7 files changed, 249 insertions(+), 2 deletions(-)
diff --git a/doc/guides/rel_notes/release_23_07.rst b/doc/guides/rel_notes/release_23_07.rst
index a9b1293689..f279036cb9 100644
--- a/doc/guides/rel_notes/release_23_07.rst
+++ b/doc/guides/rel_notes/release_23_07.rst
@@ -55,6 +55,13 @@ New Features
Also, make sure to start the actual text at the margin.
=======================================================
+* **Add mbufs recycling support. **
+ Added ``rte_eth_recycle_rx_queue_info_get`` and ``rte_eth_recycle_mbufs``
+ APIs which allow the user to copy used mbufs from the Tx mbuf ring
+ into the Rx mbuf ring. This feature supports the case that the Rx Ethernet
+ device is different from the Tx Ethernet device with respective driver
+ callback functions in ``rte_eth_recycle_mbufs``.
+
Removed Items
-------------
diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index 2c9d615fb5..c6723d5277 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -59,6 +59,10 @@ struct rte_eth_dev {
eth_rx_descriptor_status_t rx_descriptor_status;
/** Check the status of a Tx descriptor */
eth_tx_descriptor_status_t tx_descriptor_status;
+ /** Pointer to PMD transmit mbufs reuse function */
+ eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse;
+ /** Pointer to PMD receive descriptors refill function */
+ eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
/**
* Device data that is shared between primary and secondary processes
@@ -504,6 +508,10 @@ typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev,
typedef void (*eth_txq_info_get_t)(struct rte_eth_dev *dev,
uint16_t tx_queue_id, struct rte_eth_txq_info *qinfo);
+typedef void (*eth_recycle_rxq_info_get_t)(struct rte_eth_dev *dev,
+ uint16_t rx_queue_id,
+ struct rte_eth_recycle_rxq_info *recycle_rxq_info);
+
typedef int (*eth_burst_mode_get_t)(struct rte_eth_dev *dev,
uint16_t queue_id, struct rte_eth_burst_mode *mode);
@@ -1247,6 +1255,8 @@ struct eth_dev_ops {
eth_rxq_info_get_t rxq_info_get;
/** Retrieve Tx queue information */
eth_txq_info_get_t txq_info_get;
+ /** Retrieve mbufs recycle Rx queue information */
+ eth_recycle_rxq_info_get_t recycle_rxq_info_get;
eth_burst_mode_get_t rx_burst_mode_get; /**< Get Rx burst mode */
eth_burst_mode_get_t tx_burst_mode_get; /**< Get Tx burst mode */
eth_fw_version_get_t fw_version_get; /**< Get firmware version */
diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c
index 14ec8c6ccf..f8ab64f195 100644
--- a/lib/ethdev/ethdev_private.c
+++ b/lib/ethdev/ethdev_private.c
@@ -277,6 +277,8 @@ eth_dev_fp_ops_setup(struct rte_eth_fp_ops *fpo,
fpo->rx_queue_count = dev->rx_queue_count;
fpo->rx_descriptor_status = dev->rx_descriptor_status;
fpo->tx_descriptor_status = dev->tx_descriptor_status;
+ fpo->recycle_tx_mbufs_reuse = dev->recycle_tx_mbufs_reuse;
+ fpo->recycle_rx_descriptors_refill = dev->recycle_rx_descriptors_refill;
fpo->rxq.data = dev->data->rx_queues;
fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs;
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 4d03255683..7c27dcfea4 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -5784,6 +5784,37 @@ rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
return 0;
}
+int
+rte_eth_recycle_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
+ struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+ struct rte_eth_dev *dev;
+
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+ dev = &rte_eth_devices[port_id];
+
+ if (queue_id >= dev->data->nb_rx_queues) {
+ RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u\n", queue_id);
+ return -EINVAL;
+ }
+
+ if (dev->data->rx_queues == NULL ||
+ dev->data->rx_queues[queue_id] == NULL) {
+ RTE_ETHDEV_LOG(ERR,
+ "Rx queue %"PRIu16" of device with port_id=%"
+ PRIu16" has not been setup\n",
+ queue_id, port_id);
+ return -EINVAL;
+ }
+
+ if (*dev->dev_ops->recycle_rxq_info_get == NULL)
+ return -ENOTSUP;
+
+ dev->dev_ops->recycle_rxq_info_get(dev, queue_id, recycle_rxq_info);
+
+ return 0;
+}
+
int
rte_eth_rx_burst_mode_get(uint16_t port_id, uint16_t queue_id,
struct rte_eth_burst_mode *mode)
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 99fe9e238b..7434aa2483 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1820,6 +1820,30 @@ struct rte_eth_txq_info {
uint8_t queue_state; /**< one of RTE_ETH_QUEUE_STATE_*. */
} __rte_cache_min_aligned;
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice.
+ *
+ * Ethernet device Rx queue information structure for recycling mbufs.
+ * Used to retrieve Rx queue information when Tx queue reusing mbufs and moving
+ * them into Rx mbuf ring.
+ */
+struct rte_eth_recycle_rxq_info {
+ struct rte_mbuf **mbuf_ring; /**< mbuf ring of Rx queue. */
+ struct rte_mempool *mp; /**< mempool of Rx queue. */
+ uint16_t *refill_head; /**< head of Rx queue refilling mbufs. */
+ uint16_t *receive_tail; /**< tail of Rx queue receiving pkts. */
+ uint16_t mbuf_ring_size; /**< configured number of mbuf ring size. */
+ /**
+ * Requirement on mbuf refilling batch size of Rx mbuf ring.
+ * For some PMD drivers, the number of Rx mbuf ring refilling mbufs
+ * should be aligned with mbuf ring size, in order to simplify
+ * ring wrapping around.
+ * Value 0 means that PMD drivers have no requirement for this.
+ */
+ uint16_t refill_requirement;
+} __rte_cache_min_aligned;
+
/* Generic Burst mode flag definition, values can be ORed. */
/**
@@ -4809,6 +4833,31 @@ int rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
int rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
struct rte_eth_txq_info *qinfo);
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Retrieve information about given ports's Rx queue for recycling mbufs.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The Rx queue on the Ethernet devicefor which information
+ * will be retrieved.
+ * @param recycle_rxq_info
+ * A pointer to a structure of type *rte_eth_recycle_rxq_info* to be filled.
+ *
+ * @return
+ * - 0: Success
+ * - -ENODEV: If *port_id* is invalid.
+ * - -ENOTSUP: routine is not supported by the device PMD.
+ * - -EINVAL: The queue_id is out of range.
+ */
+__rte_experimental
+int rte_eth_recycle_rx_queue_info_get(uint16_t port_id,
+ uint16_t queue_id,
+ struct rte_eth_recycle_rxq_info *recycle_rxq_info);
+
/**
* Retrieve information about the Rx packet burst mode.
*
@@ -6483,6 +6532,139 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id,
return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
}
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Recycle used mbufs from a transmit queue of an Ethernet device, and move
+ * these mbufs into a mbuf ring for a receive queue of an Ethernet device.
+ * This can bypass mempool path to save CPU cycles.
+ *
+ * The rte_eth_recycle_mbufs() function loops, with rte_eth_rx_burst() and
+ * rte_eth_tx_burst() functions, freeing Tx used mbufs and replenishing Rx
+ * descriptors. The number of recycling mbufs depends on the request of Rx mbuf
+ * ring, with the constraint of enough used mbufs from Tx mbuf ring.
+ *
+ * For each recycling mbufs, the rte_eth_recycle_mbufs() function performs the
+ * following operations:
+ *
+ * - Copy used *rte_mbuf* buffer pointers from Tx mbuf ring into Rx mbuf ring.
+ *
+ * - Replenish the Rx descriptors with the recycling *rte_mbuf* mbufs freed
+ * from the Tx mbuf ring.
+ *
+ * This function spilts Rx and Tx path with different callback functions. The
+ * callback function recycle_tx_mbufs_reuse is for Tx driver. The callback
+ * function recycle_rx_descriptors_refill is for Rx driver. rte_eth_recycle_mbufs()
+ * can support the case that Rx Ethernet device is different from Tx Ethernet device.
+ *
+ * It is the responsibility of users to select the Rx/Tx queue pair to recycle
+ * mbufs. Before call this function, users must call rte_eth_recycle_rxq_info_get
+ * function to retrieve selected Rx queue information.
+ * @see rte_eth_recycle_rxq_info_get, struct rte_eth_recycle_rxq_info
+ *
+ * Currently, the rte_eth_recycle_mbufs() function can only support one-time pairing
+ * between the receive queue and transmit queue. Do not pair one receive queue with
+ * multiple transmit queues or pair one transmit queue with multiple receive queues,
+ * in order to avoid memory error rewriting.
Probably I am missing something, but why it is not possible to do something like that:
rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N, tx_queue_id=M, ...);
....
rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N, tx_queue_id=K, ...);
I.E. feed rx queue from 2 tx queues?
Two problems for this:
decide which one to choose in the driver layer.
On the other hand, current mechanism can support users to switch 1 txq to another timely
in the application layer. If user want to choose another txq, he just need to change the txq_queue_id parameter
in the API.
avoid multi-thread conflict. Spinlock will decrease the data-path performance greatly. Thus, we do not consider
1 rxq mapping multiple txqs here.
+ *
+ * @param rx_port_id
+ * Port identifying the receive side.
+ * @param rx_queue_id
+ * The index of the receive queue identifying the receive side.
+ * The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ * to rte_eth_dev_configure().
+ * @param tx_port_id
+ * Port identifying the transmit side.
+ * @param tx_queue_id
+ * The index of the transmit queue identifying the transmit side.
+ * The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ * to rte_eth_dev_configure().
+ * @param recycle_rxq_info
+ * A pointer to a structure of type *rte_eth_recycle_rxq_info* which contains
+ * the information of the Rx queue mbuf ring.
+ * @return
+ * The number of recycling mbufs.
+ */
+__rte_experimental
+static inline uint16_t
+rte_eth_recycle_mbufs(uint16_t rx_port_id, uint16_t rx_queue_id,
+ uint16_t tx_port_id, uint16_t tx_queue_id,
+ struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+ struct rte_eth_fp_ops *p;
+ void *qd;
+ uint16_t nb_mbufs;
+
+#ifdef RTE_ETHDEV_DEBUG_TX
+ if (tx_port_id >= RTE_MAX_ETHPORTS ||
+ tx_queue_id >= RTE_MAX_QUEUES_PER_PORT) {
+ RTE_ETHDEV_LOG(ERR,
+ "Invalid tx_port_id=%u or tx_queue_id=%u\n",
+ tx_port_id, tx_queue_id);
+ return 0;
+ }
+#endif
+
+ /* fetch pointer to queue data */
+ p = &rte_eth_fp_ops[tx_port_id];
+ qd = p->txq.data[tx_queue_id];
+
+#ifdef RTE_ETHDEV_DEBUG_TX
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(tx_port_id, 0);
+
+ if (qd == NULL) {
+ RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u for port_id=%u\n",
+ tx_queue_id, tx_port_id);
+ return 0;
+ }
+#endif
+ if (p->recycle_tx_mbufs_reuse == NULL)
+ return 0;
+
+ /* Copy used *rte_mbuf* buffer pointers from Tx mbuf ring
+ * into Rx mbuf ring.
+ */
+ nb_mbufs = p->recycle_tx_mbufs_reuse(qd, recycle_rxq_info);
+
+ /* If no recycling mbufs, return 0. */
+ if (nb_mbufs == 0)
+ return 0;
+
+#ifdef RTE_ETHDEV_DEBUG_RX
+ if (rx_port_id >= RTE_MAX_ETHPORTS ||
+ rx_queue_id >= RTE_MAX_QUEUES_PER_PORT) {
+ RTE_ETHDEV_LOG(ERR, "Invalid rx_port_id=%u or rx_queue_id=%u\n",
+ rx_port_id, rx_queue_id);
+ return 0;
+ }
+#endif
+
+ /* fetch pointer to queue data */
+ p = &rte_eth_fp_ops[rx_port_id];
+ qd = p->rxq.data[rx_queue_id];
+
+#ifdef RTE_ETHDEV_DEBUG_RX
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(rx_port_id, 0);
+
+ if (qd == NULL) {
+ RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for port_id=%u\n",
+ rx_queue_id, rx_port_id);
+ return 0;
+ }
+#endif
+
+ if (p->recycle_rx_descriptors_refill == NULL)
+ return 0;
+
+ /* Replenish the Rx descriptors with the recycling
+ * into Rx mbuf ring.
+ */
+ p->recycle_rx_descriptors_refill(qd, nb_mbufs);
+
+ return nb_mbufs;
+}
+
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
diff --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
index dcf8adab92..a2e6ea6b6c 100644
--- a/lib/ethdev/rte_ethdev_core.h
+++ b/lib/ethdev/rte_ethdev_core.h
@@ -56,6 +56,13 @@ typedef int (*eth_rx_descriptor_status_t)(void *rxq, uint16_t offset);
/** @internal Check the status of a Tx descriptor */
typedef int (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
+/** @internal Copy used mbufs from Tx mbuf ring into Rx mbuf ring */
+typedef uint16_t (*eth_recycle_tx_mbufs_reuse_t)(void *txq,
+ struct rte_eth_recycle_rxq_info *recycle_rxq_info);
+
+/** @internal Refill Rx descriptors with the recycling mbufs */
+typedef void (*eth_recycle_rx_descriptors_refill_t)(void *rxq, uint16_t nb);
+
/**
* @internal
* Structure used to hold opaque pointers to internal ethdev Rx/Tx
@@ -90,9 +97,11 @@ struct rte_eth_fp_ops {
eth_rx_queue_count_t rx_queue_count;
/** Check the status of a Rx descriptor. */
eth_rx_descriptor_status_t rx_descriptor_status;
+ /** Refill Rx descriptors with the recycling mbufs. */
+ eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
I am afraid we can't put new fields here without ABI breakage.
Agree
It has to be below rxq.
Now thinking about current layout probably not the best one,
and when introducing this struct, I should probably put rxq either
on the top of the struct, or on the next cache line.
But such change is not possible right now anyway.
Same story for txq.
Thus we should rearrange the structure like below:
struct rte_eth_fp_ops {
struct rte_ethdev_qdata rxq;
eth_rx_burst_t rx_pkt_burst;
eth_rx_queue_count_t rx_queue_count;
eth_rx_descriptor_status_t rx_descriptor_status;
eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
uintptr_t reserved1[2];
}
/** Rx queues data. */
struct rte_ethdev_qdata rxq;
- uintptr_t reserved1[3];
+ uintptr_t reserved1[2];
/**@}*/
/**@{*/
@@ -106,9 +115,11 @@ struct rte_eth_fp_ops {
eth_tx_prep_t tx_pkt_prepare;
/** Check the status of a Tx descriptor. */
eth_tx_descriptor_status_t tx_descriptor_status;
+ /** Copy used mbufs from Tx mbuf ring into Rx. */
+ eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse;
/** Tx queues data. */
struct rte_ethdev_qdata txq;
- uintptr_t reserved2[3];
+ uintptr_t reserved2[2];
/**@}*/
} __rte_cache_aligned;
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 357d1a88c0..45c417f6bd 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -299,6 +299,10 @@ EXPERIMENTAL {
rte_flow_action_handle_query_update;
rte_flow_async_action_handle_query_update;
rte_flow_async_create_by_index;
+
+ # added in 23.07
+ rte_eth_recycle_mbufs;
+ rte_eth_recycle_rx_queue_info_get;
};
INTERNAL {--
2.25.1