From: Bruce Richardson <bruce.richardson@intel.com>
To: Shaiq Wani <shaiq.wani@intel.com>
Cc: <dev@dpdk.org>, <aman.deep.singh@intel.com>
Subject: Re: [PATCH] net/intel: using common functions in idpf driver
Date: Wed, 12 Mar 2025 16:38:48 +0000 [thread overview]
Message-ID: <Z9G4mKzUrPs2FRtC@bricha3-mobl1.ger.corp.intel.com> (raw)
In-Reply-To: <20250312155351.409879-1-shaiq.wani@intel.com>
On Wed, Mar 12, 2025 at 09:23:51PM +0530, Shaiq Wani wrote:
> reworked the drivers to use the common functions and structures
> from drivers/net/intel/common.
>
> Signed-off-by: Shaiq Wani <shaiq.wani@intel.com>
> ---
> drivers/net/intel/common/tx.h | 21 +++-
> drivers/net/intel/cpfl/cpfl_ethdev.c | 1 +
> drivers/net/intel/cpfl/cpfl_ethdev.h | 2 +-
> drivers/net/intel/cpfl/cpfl_rxtx.c | 66 +++++------
> drivers/net/intel/cpfl/cpfl_rxtx.h | 3 +-
> drivers/net/intel/cpfl/cpfl_rxtx_vec_common.h | 7 +-
> drivers/net/intel/idpf/idpf_common_rxtx.c | 108 ++++++++---------
> drivers/net/intel/idpf/idpf_common_rxtx.h | 65 ++--------
> .../net/intel/idpf/idpf_common_rxtx_avx2.c | 112 +++++-------------
> .../net/intel/idpf/idpf_common_rxtx_avx512.c | 104 ++++++++--------
> drivers/net/intel/idpf/idpf_common_virtchnl.c | 8 +-
> drivers/net/intel/idpf/idpf_common_virtchnl.h | 2 +-
> drivers/net/intel/idpf/idpf_ethdev.c | 3 +-
> drivers/net/intel/idpf/idpf_rxtx.c | 46 +++----
> drivers/net/intel/idpf/idpf_rxtx.h | 1 +
> drivers/net/intel/idpf/idpf_rxtx_vec_common.h | 17 ++-
> drivers/net/intel/idpf/meson.build | 2 +-
> 17 files changed, 248 insertions(+), 320 deletions(-)
>
Thanks for undertaking this work. Hopefully it can simplify our code and
improve it. Some feedback from an initial review inline below.
Regards,
/Bruce
> diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
> index d9cf4474fc..532adb4fd1 100644
> --- a/drivers/net/intel/common/tx.h
> +++ b/drivers/net/intel/common/tx.h
> @@ -36,6 +36,7 @@ struct ci_tx_queue {
> volatile struct iavf_tx_desc *iavf_tx_ring;
> volatile struct ice_tx_desc *ice_tx_ring;
> volatile union ixgbe_adv_tx_desc *ixgbe_tx_ring;
> + volatile struct idpf_base_tx_desc *idpf_tx_ring;
> };
Very minor nit: The entries listed in the union are in alphabetical order,
so let's put idpf just one line up.
> volatile uint8_t *qtx_tail; /* register address of tail */
> union {
> @@ -51,7 +52,7 @@ struct ci_tx_queue {
> uint16_t nb_tx_free;
> /* Start freeing TX buffers if there are less free descriptors than
> * this value.
> - */
> + */
> uint16_t tx_free_thresh;
> /* Number of TX descriptors to use before RS bit is set. */
> uint16_t tx_rs_thresh;
> @@ -98,6 +99,24 @@ struct ci_tx_queue {
> uint8_t wthresh; /**< Write-back threshold reg. */
> uint8_t using_ipsec; /**< indicates that IPsec TX feature is in use */
> };
> + struct { /* idpf specific values */
This struct is quite a bit bigger I think than other structs in the union.
Hopefully there is some way to cut it down a bit. (ixgbe is the next
biggest, at 24 bytes in size. This, by my count, is 3 times that, at 72
bytes)
> + volatile union {
> + struct idpf_flex_tx_sched_desc *desc_ring;
> + struct idpf_splitq_tx_compl_desc *compl_ring;
> + };
> + bool q_started;
Do we really need this value? Other drivers seem to manage fine without a
special queue variable indicating started or not.
> + const struct idpf_txq_ops *idpf_ops;
> + /* only valid for split queue mode */
> + uint16_t sw_nb_desc;
> + uint16_t sw_tail;
We are wasting lots of space in the structure here by having the fields
placed at random within it. If the "q_started" variable is kept as-is, that
is wasting 7 bytes. These two variables waste 4 bytes of padding after
them. There are similarly 3 bytes wasted after "expected_gen_id". Just
reordering the fields alone will bring the size down by 8 bytes (with 6
bytes of padding lost at the end).
For sw_nb_desc field - is this not the same as "nb_tx_desc" field?
For sw_tail - is this not the same as "tx_tail"?
> + void **txqs;
> + uint32_t tx_start_qid;
> + uint8_t expected_gen_id;
> + struct ci_tx_queue *complq;
> +#define IDPF_TX_CTYPE_NUM 8
> + uint16_t ctype[IDPF_TX_CTYPE_NUM];
> +
> + };
If some of these fields are only relevant for splitq model, or when using a
queue with timestamps or scheduling, would there be a large impact to
having them split off into a separate structure, pointed to by the general
tx queue structure? To avoid expanding the struct size by a lot for all
drivers it would be good if we can keep the idpf-specific data to 32-bytes
or smaller (ideally 24-bytes which would involve no change!)
> };
> };
>
> diff --git a/drivers/net/intel/cpfl/cpfl_ethdev.c b/drivers/net/intel/cpfl/cpfl_ethdev.c
> index 1817221652..c67ccf6b53 100644
> --- a/drivers/net/intel/cpfl/cpfl_ethdev.c
> +++ b/drivers/net/intel/cpfl/cpfl_ethdev.c
> @@ -18,6 +18,7 @@
> #include "cpfl_rxtx.h"
> #include "cpfl_flow.h"
> #include "cpfl_rules.h"
> +#include "../common/tx.h"
>
> #define CPFL_REPRESENTOR "representor"
> #define CPFL_TX_SINGLE_Q "tx_single"
> diff --git a/drivers/net/intel/cpfl/cpfl_ethdev.h b/drivers/net/intel/cpfl/cpfl_ethdev.h
> index 9a38a69194..d4e1176ab1 100644
> --- a/drivers/net/intel/cpfl/cpfl_ethdev.h
> +++ b/drivers/net/intel/cpfl/cpfl_ethdev.h
> @@ -174,7 +174,7 @@ struct cpfl_vport {
> uint16_t nb_p2p_txq;
>
> struct idpf_rx_queue *p2p_rx_bufq;
> - struct idpf_tx_queue *p2p_tx_complq;
> + struct ci_tx_queue *p2p_tx_complq;
> bool p2p_manual_bind;
> };
>
> diff --git a/drivers/net/intel/cpfl/cpfl_rxtx.c b/drivers/net/intel/cpfl/cpfl_rxtx.c
> index 47351ca102..d7b5a660b5 100644
> --- a/drivers/net/intel/cpfl/cpfl_rxtx.c
> +++ b/drivers/net/intel/cpfl/cpfl_rxtx.c
> @@ -11,7 +11,7 @@
> #include "cpfl_rxtx_vec_common.h"
>
> static inline void
> -cpfl_tx_hairpin_descq_reset(struct idpf_tx_queue *txq)
> +cpfl_tx_hairpin_descq_reset(struct ci_tx_queue *txq)
> {
> uint32_t i, size;
>
> @@ -26,7 +26,7 @@ cpfl_tx_hairpin_descq_reset(struct idpf_tx_queue *txq)
> }
>
> static inline void
> -cpfl_tx_hairpin_complq_reset(struct idpf_tx_queue *cq)
> +cpfl_tx_hairpin_complq_reset(struct ci_tx_queue *cq)
> {
> uint32_t i, size;
>
> @@ -249,7 +249,7 @@ cpfl_rx_split_bufq_setup(struct rte_eth_dev *dev, struct idpf_rx_queue *rxq,
> idpf_qc_split_rx_bufq_reset(bufq);
> bufq->qrx_tail = hw->hw_addr + (vport->chunks_info.rx_buf_qtail_start +
> queue_idx * vport->chunks_info.rx_buf_qtail_spacing);
> - bufq->ops = &def_rxq_ops;
> + bufq->idpf_ops = &def_rxq_ops;
> bufq->q_set = true;
>
> if (bufq_id == IDPF_RX_SPLIT_BUFQ1_ID) {
> @@ -310,7 +310,7 @@ cpfl_rx_queue_release(void *rxq)
> }
>
> /* Single queue */
> - q->ops->release_mbufs(q);
> + q->idpf_ops->release_mbufs(q);
Looking through the code, the only in the ops structure is the mbuf release
function. Presumably this is to account for AVX512 vector code vs
non-avx512 code with different software ring structures. Based on what we
did using other drivers, we should be ok to just use a flag for this -
something that uses only 1 byte in the txq struct rather than 8 for a
pointer. Having a flag is also multi-process safe - using a pointer will
break in multi-process scenarios.
> rte_free(q->sw_ring);
> rte_memzone_free(q->mz);
> rte_free(cpfl_rxq);
> @@ -320,7 +320,7 @@ static void
> cpfl_tx_queue_release(void *txq)
> {
> struct cpfl_tx_queue *cpfl_txq = txq;
> - struct idpf_tx_queue *q = NULL;
> + struct ci_tx_queue *q = NULL;
>
> if (cpfl_txq == NULL)
> return;
> @@ -332,7 +332,7 @@ cpfl_tx_queue_release(void *txq)
> rte_free(q->complq);
> }
>
> - q->ops->release_mbufs(q);
> + q->idpf_ops->release_mbufs(q);
> rte_free(q->sw_ring);
> rte_memzone_free(q->mz);
> rte_free(cpfl_txq);
> @@ -426,7 +426,7 @@ cpfl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> idpf_qc_single_rx_queue_reset(rxq);
> rxq->qrx_tail = hw->hw_addr + (vport->chunks_info.rx_qtail_start +
> queue_idx * vport->chunks_info.rx_qtail_spacing);
> - rxq->ops = &def_rxq_ops;
> + rxq->idpf_ops = &def_rxq_ops;
> } else {
> idpf_qc_split_rx_descq_reset(rxq);
>
> @@ -468,18 +468,18 @@ cpfl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> }
>
> static int
> -cpfl_tx_complq_setup(struct rte_eth_dev *dev, struct idpf_tx_queue *txq,
> +cpfl_tx_complq_setup(struct rte_eth_dev *dev, struct ci_tx_queue *txq,
> uint16_t queue_idx, uint16_t nb_desc,
> unsigned int socket_id)
> {
> struct cpfl_vport *cpfl_vport = dev->data->dev_private;
> struct idpf_vport *vport = &cpfl_vport->base;
> const struct rte_memzone *mz;
> - struct idpf_tx_queue *cq;
> + struct ci_tx_queue *cq;
> int ret;
>
> cq = rte_zmalloc_socket("cpfl splitq cq",
> - sizeof(struct idpf_tx_queue),
> + sizeof(struct ci_tx_queue),
> RTE_CACHE_LINE_SIZE,
> socket_id);
> if (cq == NULL) {
> @@ -501,7 +501,7 @@ cpfl_tx_complq_setup(struct rte_eth_dev *dev, struct idpf_tx_queue *txq,
> ret = -ENOMEM;
> goto err_mz_reserve;
> }
> - cq->tx_ring_phys_addr = mz->iova;
> + cq->tx_ring_dma = mz->iova;
> cq->compl_ring = mz->addr;
> cq->mz = mz;
> idpf_qc_split_tx_complq_reset(cq);
> @@ -528,7 +528,7 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> struct cpfl_tx_queue *cpfl_txq;
> struct idpf_hw *hw = &base->hw;
> const struct rte_memzone *mz;
> - struct idpf_tx_queue *txq;
> + struct ci_tx_queue *txq;
> uint64_t offloads;
> uint16_t len;
> bool is_splitq;
> @@ -565,8 +565,8 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> is_splitq = !!(vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT);
>
> txq->nb_tx_desc = nb_desc;
> - txq->rs_thresh = tx_rs_thresh;
> - txq->free_thresh = tx_free_thresh;
> + txq->tx_rs_thresh = tx_rs_thresh;
> + txq->tx_free_thresh = tx_free_thresh;
Rather than one big patch, as here, the process of changing the code to use
the common functions might be better done in stages across a couple of
patches (as was done for the other drivers).
For example, a good first patch would be to keep the separate txq structure
in idpf, but rename any fields that need it to align with the common
structure names. Then later patches which swap the dedicated structure for
the common one are simpler and only need to worry about the structure
names, not the field names.
> txq->queue_id = vport->chunks_info.tx_start_qid + queue_idx;
> txq->port_id = dev->data->port_id;
> txq->offloads = cpfl_tx_offload_convert(offloads);
> @@ -585,11 +585,11 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> ret = -ENOMEM;
> goto err_mz_reserve;
> }
> - txq->tx_ring_phys_addr = mz->iova;
> + txq->tx_ring_dma = mz->iova;
> txq->mz = mz;
>
> txq->sw_ring = rte_zmalloc_socket("cpfl tx sw ring",
> - sizeof(struct idpf_tx_entry) * len,
> + sizeof(struct ci_tx_entry) * len,
> RTE_CACHE_LINE_SIZE, socket_id);
> if (txq->sw_ring == NULL) {
> PMD_INIT_LOG(ERR, "Failed to allocate memory for SW TX ring");
> @@ -598,7 +598,7 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> }
>
> if (!is_splitq) {
> - txq->tx_ring = mz->addr;
> + txq->idpf_tx_ring = mz->addr;
> idpf_qc_single_tx_queue_reset(txq);
> } else {
> txq->desc_ring = mz->addr;
> @@ -613,7 +613,7 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
>
> txq->qtx_tail = hw->hw_addr + (vport->chunks_info.tx_qtail_start +
> queue_idx * vport->chunks_info.tx_qtail_spacing);
> - txq->ops = &def_txq_ops;
> + txq->idpf_ops = &def_txq_ops;
> cpfl_vport->nb_data_txq++;
> txq->q_set = true;
> dev->data->tx_queues[queue_idx] = cpfl_txq;
> @@ -663,7 +663,7 @@ cpfl_rx_hairpin_bufq_setup(struct rte_eth_dev *dev, struct idpf_rx_queue *bufq,
> bufq->rx_buf_len = CPFL_P2P_MBUF_SIZE - RTE_PKTMBUF_HEADROOM;
>
> bufq->q_set = true;
> - bufq->ops = &def_rxq_ops;
> + bufq->idpf_ops = &def_rxq_ops;
>
> return 0;
> }
<snip for brevity>
prev parent reply other threads:[~2025-03-12 16:39 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-12 15:53 Shaiq Wani
2025-03-12 16:38 ` Bruce Richardson [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Z9G4mKzUrPs2FRtC@bricha3-mobl1.ger.corp.intel.com \
--to=bruce.richardson@intel.com \
--cc=aman.deep.singh@intel.com \
--cc=dev@dpdk.org \
--cc=shaiq.wani@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).