DPDK patches and discussions
 help / color / mirror / Atom feed
From: Slava Ovsiienko <viacheslavo@nvidia.com>
To: Michael Baum <michaelba@nvidia.com>, "dev@dpdk.org" <dev@dpdk.org>
Cc: Matan Azrad <matan@nvidia.com>, Raslan Darawsheh <rasland@nvidia.com>
Subject: Re: [dpdk-dev] [PATCH 2/6] net/mlx5: separate Rx function implementations to new file
Date: Tue, 6 Apr 2021 09:27:59 +0000
Message-ID: <DM6PR12MB3753DADA41BAB1D82696413CDF769@DM6PR12MB3753.namprd12.prod.outlook.com> (raw)
In-Reply-To: <1617631256-3018-3-git-send-email-michaelba@nvidia.com>

> -----Original Message-----
> From: Michael Baum <michaelba@nvidia.com>
> Sent: Monday, April 5, 2021 17:01
> To: dev@dpdk.org
> Cc: Matan Azrad <matan@nvidia.com>; Raslan Darawsheh
> <rasland@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> Subject: [PATCH 2/6] net/mlx5: separate Rx function implementations to
> new file
> 
> This patch separates Rx function implementations to different source
> file as an optional preparation step for further consolidation of Rx
> burst functions.
> 
> Signed-off-by: Michael Baum <michaelba@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>


> ---
>  drivers/net/mlx5/meson.build |    1 +
>  drivers/net/mlx5/mlx5_rx.c   | 1203
> ++++++++++++++++++++++++++++++++++++++++++
>  drivers/net/mlx5/mlx5_rx.h   |    4 +-
>  drivers/net/mlx5/mlx5_rxtx.c | 1179 +----------------------------------------
>  drivers/net/mlx5/mlx5_rxtx.h |    2 +
>  5 files changed, 1209 insertions(+), 1180 deletions(-)
>  create mode 100644 drivers/net/mlx5/mlx5_rx.c
> 
> diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> index f2fafbd..0a89a27 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -19,6 +19,7 @@ sources = files(
>  	'mlx5_mac.c',
>  	'mlx5_mr.c',
>  	'mlx5_rss.c',
> +    'mlx5_rx.c',
>  	'mlx5_rxmode.c',
>  	'mlx5_rxq.c',
>  	'mlx5_rxtx.c',
> diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
> new file mode 100644
> index 0000000..e9fcb52
> --- /dev/null
> +++ b/drivers/net/mlx5/mlx5_rx.c
> @@ -0,0 +1,1203 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2021 6WIND S.A.
> + * Copyright 2021 Mellanox Technologies, Ltd
> + */
> +
> +#include <stdint.h>
> +#include <string.h>
> +#include <stdlib.h>
> +
> +#include <rte_mbuf.h>
> +#include <rte_mempool.h>
> +#include <rte_prefetch.h>
> +#include <rte_common.h>
> +#include <rte_branch_prediction.h>
> +#include <rte_ether.h>
> +#include <rte_cycles.h>
> +#include <rte_flow.h>
> +
> +#include <mlx5_prm.h>
> +#include <mlx5_common.h>
> +
> +#include "mlx5_autoconf.h"
> +#include "mlx5_defs.h"
> +#include "mlx5.h"
> +#include "mlx5_mr.h"
> +#include "mlx5_utils.h"
> +#include "mlx5_rxtx.h"
> +#include "mlx5_rx.h"
> +
> +
> +static __rte_always_inline uint32_t
> +rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe
> *cqe,
> +		   volatile struct mlx5_mini_cqe8 *mcqe);
> +
> +static __rte_always_inline int
> +mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
> +		 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe);
> +
> +static __rte_always_inline uint32_t
> +rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe);
> +
> +static __rte_always_inline void
> +rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
> +	       volatile struct mlx5_cqe *cqe,
> +	       volatile struct mlx5_mini_cqe8 *mcqe);
> +
> +static inline void
> +mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp,
> +			volatile struct mlx5_cqe *__rte_restrict cqe,
> +			uint32_t phcsum, uint8_t l4_type);
> +
> +static inline void
> +mlx5_lro_update_hdr(uint8_t *__rte_restrict padd,
> +		    volatile struct mlx5_cqe *__rte_restrict cqe,
> +		    volatile struct mlx5_mini_cqe8 *mcqe,
> +		    struct mlx5_rxq_data *rxq, uint32_t len);
> +
> +
> +/**
> + * Internal function to compute the number of used descriptors in an RX
> queue.
> + *
> + * @param rxq
> + *   The Rx queue.
> + *
> + * @return
> + *   The number of used Rx descriptor.
> + */
> +static uint32_t
> +rx_queue_count(struct mlx5_rxq_data *rxq)
> +{
> +	struct rxq_zip *zip = &rxq->zip;
> +	volatile struct mlx5_cqe *cqe;
> +	const unsigned int cqe_n = (1 << rxq->cqe_n);
> +	const unsigned int sges_n = (1 << rxq->sges_n);
> +	const unsigned int elts_n = (1 << rxq->elts_n);
> +	const unsigned int strd_n = (1 << rxq->strd_num_n);
> +	const unsigned int cqe_cnt = cqe_n - 1;
> +	unsigned int cq_ci, used;
> +
> +	/* if we are processing a compressed cqe */
> +	if (zip->ai) {
> +		used = zip->cqe_cnt - zip->ai;
> +		cq_ci = zip->cq_ci;
> +	} else {
> +		used = 0;
> +		cq_ci = rxq->cq_ci;
> +	}
> +	cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
> +	while (check_cqe(cqe, cqe_n, cq_ci) !=
> MLX5_CQE_STATUS_HW_OWN) {
> +		int8_t op_own;
> +		unsigned int n;
> +
> +		op_own = cqe->op_own;
> +		if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED)
> +			n = rte_be_to_cpu_32(cqe->byte_cnt);
> +		else
> +			n = 1;
> +		cq_ci += n;
> +		used += n;
> +		cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
> +	}
> +	used = RTE_MIN(used * sges_n, elts_n * strd_n);
> +	return used;
> +}
> +
> +/**
> + * DPDK callback to check the status of a Rx descriptor.
> + *
> + * @param rx_queue
> + *   The Rx queue.
> + * @param[in] offset
> + *   The index of the descriptor in the ring.
> + *
> + * @return
> + *   The status of the Rx descriptor.
> + */
> +int
> +mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
> +{
> +	struct mlx5_rxq_data *rxq = rx_queue;
> +	struct mlx5_rxq_ctrl *rxq_ctrl =
> +			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
> +	struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv);
> +
> +	if (dev->rx_pkt_burst == NULL ||
> +	    dev->rx_pkt_burst == removed_rx_burst) {
> +		rte_errno = ENOTSUP;
> +		return -rte_errno;
> +	}
> +	if (offset >= (1 << rxq->cqe_n)) {
> +		rte_errno = EINVAL;
> +		return -rte_errno;
> +	}
> +	if (offset < rx_queue_count(rxq))
> +		return RTE_ETH_RX_DESC_DONE;
> +	return RTE_ETH_RX_DESC_AVAIL;
> +}
> +
> +/**
> + * DPDK callback to get the RX queue information.
> + *
> + * @param dev
> + *   Pointer to the device structure.
> + *
> + * @param rx_queue_id
> + *   Rx queue identificator.
> + *
> + * @param qinfo
> + *   Pointer to the RX queue information structure.
> + *
> + * @return
> + *   None.
> + */
> +
> +void
> +mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id,
> +		  struct rte_eth_rxq_info *qinfo)
> +{
> +	struct mlx5_priv *priv = dev->data->dev_private;
> +	struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
> +	struct mlx5_rxq_ctrl *rxq_ctrl =
> +		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
> +
> +	if (!rxq)
> +		return;
> +	qinfo->mp = mlx5_rxq_mprq_enabled(rxq) ?
> +					rxq->mprq_mp : rxq->mp;
> +	qinfo->conf.rx_thresh.pthresh = 0;
> +	qinfo->conf.rx_thresh.hthresh = 0;
> +	qinfo->conf.rx_thresh.wthresh = 0;
> +	qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh;
> +	qinfo->conf.rx_drop_en = 1;
> +	qinfo->conf.rx_deferred_start = rxq_ctrl ? 0 : 1;
> +	qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
> +	qinfo->scattered_rx = dev->data->scattered_rx;
> +	qinfo->nb_desc = mlx5_rxq_mprq_enabled(rxq) ?
> +		(1 << rxq->elts_n) * (1 << rxq->strd_num_n) :
> +		(1 << rxq->elts_n);
> +}
> +
> +/**
> + * DPDK callback to get the RX packet burst mode information.
> + *
> + * @param dev
> + *   Pointer to the device structure.
> + *
> + * @param rx_queue_id
> + *   Rx queue identificatior.
> + *
> + * @param mode
> + *   Pointer to the burts mode information.
> + *
> + * @return
> + *   0 as success, -EINVAL as failure.
> + */
> +int
> +mlx5_rx_burst_mode_get(struct rte_eth_dev *dev,
> +		       uint16_t rx_queue_id __rte_unused,
> +		       struct rte_eth_burst_mode *mode)
> +{
> +	eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
> +	struct mlx5_priv *priv = dev->data->dev_private;
> +	struct mlx5_rxq_data *rxq;
> +
> +	rxq = (*priv->rxqs)[rx_queue_id];
> +	if (!rxq) {
> +		rte_errno = EINVAL;
> +		return -rte_errno;
> +	}
> +	if (pkt_burst == mlx5_rx_burst) {
> +		snprintf(mode->info, sizeof(mode->info), "%s", "Scalar");
> +	} else if (pkt_burst == mlx5_rx_burst_mprq) {
> +		snprintf(mode->info, sizeof(mode->info), "%s", "Multi-
> Packet RQ");
> +	} else if (pkt_burst == mlx5_rx_burst_vec) {
> +#if defined RTE_ARCH_X86_64
> +		snprintf(mode->info, sizeof(mode->info), "%s", "Vector
> SSE");
> +#elif defined RTE_ARCH_ARM64
> +		snprintf(mode->info, sizeof(mode->info), "%s", "Vector
> Neon");
> +#elif defined RTE_ARCH_PPC_64
> +		snprintf(mode->info, sizeof(mode->info), "%s", "Vector
> AltiVec");
> +#else
> +		return -EINVAL;
> +#endif
> +	} else if (pkt_burst == mlx5_rx_burst_mprq_vec) {
> +#if defined RTE_ARCH_X86_64
> +		snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ
> Vector SSE");
> +#elif defined RTE_ARCH_ARM64
> +		snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ
> Vector Neon");
> +#elif defined RTE_ARCH_PPC_64
> +		snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ
> Vector AltiVec");
> +#else
> +		return -EINVAL;
> +#endif
> +	} else {
> +		return -EINVAL;
> +	}
> +	return 0;
> +}
> +
> +/**
> + * DPDK callback to get the number of used descriptors in a RX queue.
> + *
> + * @param dev
> + *   Pointer to the device structure.
> + *
> + * @param rx_queue_id
> + *   The Rx queue.
> + *
> + * @return
> + *   The number of used rx descriptor.
> + *   -EINVAL if the queue is invalid
> + */
> +uint32_t
> +mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
> +{
> +	struct mlx5_priv *priv = dev->data->dev_private;
> +	struct mlx5_rxq_data *rxq;
> +
> +	if (dev->rx_pkt_burst == NULL ||
> +	    dev->rx_pkt_burst == removed_rx_burst) {
> +		rte_errno = ENOTSUP;
> +		return -rte_errno;
> +	}
> +	rxq = (*priv->rxqs)[rx_queue_id];
> +	if (!rxq) {
> +		rte_errno = EINVAL;
> +		return -rte_errno;
> +	}
> +	return rx_queue_count(rxq);
> +}
> +
> +/**
> + * Translate RX completion flags to packet type.
> + *
> + * @param[in] rxq
> + *   Pointer to RX queue structure.
> + * @param[in] cqe
> + *   Pointer to CQE.
> + *
> + * @note: fix mlx5_dev_supported_ptypes_get() if any change here.
> + *
> + * @return
> + *   Packet type for struct rte_mbuf.
> + */
> +static inline uint32_t
> +rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe
> *cqe,
> +				   volatile struct mlx5_mini_cqe8 *mcqe)
> +{
> +	uint8_t idx;
> +	uint8_t ptype;
> +	uint8_t pinfo = (cqe->pkt_info & 0x3) << 6;
> +
> +	/* Get l3/l4 header from mini-CQE in case L3/L4 format*/
> +	if (mcqe == NULL ||
> +	    rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
> +		ptype = (cqe->hdr_type_etc & 0xfc00) >> 10;
> +	else
> +		ptype = mcqe->hdr_type >> 2;
> +	/*
> +	 * The index to the array should have:
> +	 * bit[1:0] = l3_hdr_type
> +	 * bit[4:2] = l4_hdr_type
> +	 * bit[5] = ip_frag
> +	 * bit[6] = tunneled
> +	 * bit[7] = outer_l3_type
> +	 */
> +	idx = pinfo | ptype;
> +	return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6));
> +}
> +
> +/**
> + * Initialize Rx WQ and indexes.
> + *
> + * @param[in] rxq
> + *   Pointer to RX queue structure.
> + */
> +void
> +mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
> +{
> +	const unsigned int wqe_n = 1 << rxq->elts_n;
> +	unsigned int i;
> +
> +	for (i = 0; (i != wqe_n); ++i) {
> +		volatile struct mlx5_wqe_data_seg *scat;
> +		uintptr_t addr;
> +		uint32_t byte_count;
> +
> +		if (mlx5_rxq_mprq_enabled(rxq)) {
> +			struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i];
> +
> +			scat = &((volatile struct mlx5_wqe_mprq *)
> +				rxq->wqes)[i].dseg;
> +			addr = (uintptr_t)mlx5_mprq_buf_addr(buf,
> +							 1 << rxq-
> >strd_num_n);
> +			byte_count = (1 << rxq->strd_sz_n) *
> +					(1 << rxq->strd_num_n);
> +		} else {
> +			struct rte_mbuf *buf = (*rxq->elts)[i];
> +
> +			scat = &((volatile struct mlx5_wqe_data_seg *)
> +					rxq->wqes)[i];
> +			addr = rte_pktmbuf_mtod(buf, uintptr_t);
> +			byte_count = DATA_LEN(buf);
> +		}
> +		/* scat->addr must be able to store a pointer. */
> +		MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t));
> +		*scat = (struct mlx5_wqe_data_seg){
> +			.addr = rte_cpu_to_be_64(addr),
> +			.byte_count = rte_cpu_to_be_32(byte_count),
> +			.lkey = mlx5_rx_addr2mr(rxq, addr),
> +		};
> +	}
> +	rxq->consumed_strd = 0;
> +	rxq->decompressed = 0;
> +	rxq->rq_pi = 0;
> +	rxq->zip = (struct rxq_zip){
> +		.ai = 0,
> +	};
> +	rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ?
> +		(wqe_n >> rxq->sges_n) * (1 << rxq->strd_num_n) : 0;
> +	/* Update doorbell counter. */
> +	rxq->rq_ci = wqe_n >> rxq->sges_n;
> +	rte_io_wmb();
> +	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
> +}
> +
> +/**
> + * Handle a Rx error.
> + * The function inserts the RQ state to reset when the first error CQE is
> + * shown, then drains the CQ by the caller function loop. When the CQ is
> empty,
> + * it moves the RQ state to ready and initializes the RQ.
> + * Next CQE identification and error counting are in the caller responsibility.
> + *
> + * @param[in] rxq
> + *   Pointer to RX queue structure.
> + * @param[in] vec
> + *   1 when called from vectorized Rx burst, need to prepare mbufs for the
> RQ.
> + *   0 when called from non-vectorized Rx burst.
> + *
> + * @return
> + *   -1 in case of recovery error, otherwise the CQE status.
> + */
> +int
> +mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
> +{
> +	const uint16_t cqe_n = 1 << rxq->cqe_n;
> +	const uint16_t cqe_mask = cqe_n - 1;
> +	const uint16_t wqe_n = 1 << rxq->elts_n;
> +	const uint16_t strd_n = 1 << rxq->strd_num_n;
> +	struct mlx5_rxq_ctrl *rxq_ctrl =
> +			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
> +	union {
> +		volatile struct mlx5_cqe *cqe;
> +		volatile struct mlx5_err_cqe *err_cqe;
> +	} u = {
> +		.cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask],
> +	};
> +	struct mlx5_mp_arg_queue_state_modify sm;
> +	int ret;
> +
> +	switch (rxq->err_state) {
> +	case MLX5_RXQ_ERR_STATE_NO_ERROR:
> +		rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET;
> +		/* Fall-through */
> +	case MLX5_RXQ_ERR_STATE_NEED_RESET:
> +		sm.is_wq = 1;
> +		sm.queue_id = rxq->idx;
> +		sm.state = IBV_WQS_RESET;
> +		if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv),
> &sm))
> +			return -1;
> +		if (rxq_ctrl->dump_file_n <
> +		    rxq_ctrl->priv->config.max_dump_files_num) {
> +			MKSTR(err_str, "Unexpected CQE error syndrome "
> +			      "0x%02x CQN = %u RQN = %u wqe_counter = %u"
> +			      " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome,
> +			      rxq->cqn, rxq_ctrl->wqn,
> +			      rte_be_to_cpu_16(u.err_cqe->wqe_counter),
> +			      rxq->rq_ci << rxq->sges_n, rxq->cq_ci);
> +			MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u",
> +			      rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc());
> +			mlx5_dump_debug_information(name, NULL,
> err_str, 0);
> +			mlx5_dump_debug_information(name, "MLX5 Error
> CQ:",
> +						    (const void *)((uintptr_t)
> +								    rxq->cqes),
> +						    sizeof(*u.cqe) * cqe_n);
> +			mlx5_dump_debug_information(name, "MLX5 Error
> RQ:",
> +						    (const void *)((uintptr_t)
> +								    rxq-
> >wqes),
> +						    16 * wqe_n);
> +			rxq_ctrl->dump_file_n++;
> +		}
> +		rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY;
> +		/* Fall-through */
> +	case MLX5_RXQ_ERR_STATE_NEED_READY:
> +		ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci);
> +		if (ret == MLX5_CQE_STATUS_HW_OWN) {
> +			rte_io_wmb();
> +			*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
> +			rte_io_wmb();
> +			/*
> +			 * The RQ consumer index must be zeroed while
> moving
> +			 * from RESET state to RDY state.
> +			 */
> +			*rxq->rq_db = rte_cpu_to_be_32(0);
> +			rte_io_wmb();
> +			sm.is_wq = 1;
> +			sm.queue_id = rxq->idx;
> +			sm.state = IBV_WQS_RDY;
> +			if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl-
> >priv),
> +						    &sm))
> +				return -1;
> +			if (vec) {
> +				const uint32_t elts_n =
> +					mlx5_rxq_mprq_enabled(rxq) ?
> +					wqe_n * strd_n : wqe_n;
> +				const uint32_t e_mask = elts_n - 1;
> +				uint32_t elts_ci =
> +					mlx5_rxq_mprq_enabled(rxq) ?
> +					rxq->elts_ci : rxq->rq_ci;
> +				uint32_t elt_idx;
> +				struct rte_mbuf **elt;
> +				int i;
> +				unsigned int n = elts_n - (elts_ci -
> +							  rxq->rq_pi);
> +
> +				for (i = 0; i < (int)n; ++i) {
> +					elt_idx = (elts_ci + i) & e_mask;
> +					elt = &(*rxq->elts)[elt_idx];
> +					*elt = rte_mbuf_raw_alloc(rxq->mp);
> +					if (!*elt) {
> +						for (i--; i >= 0; --i) {
> +							elt_idx = (elts_ci +
> +								   i) & elts_n;
> +							elt = &(*rxq->elts)
> +								[elt_idx];
> +
> 	rte_pktmbuf_free_seg
> +								(*elt);
> +						}
> +						return -1;
> +					}
> +				}
> +				for (i = 0; i < (int)elts_n; ++i) {
> +					elt = &(*rxq->elts)[i];
> +					DATA_LEN(*elt) =
> +						(uint16_t)((*elt)->buf_len -
> +
> 	rte_pktmbuf_headroom(*elt));
> +				}
> +				/* Padding with a fake mbuf for vec Rx. */
> +				for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP;
> ++i)
> +					(*rxq->elts)[elts_n + i] =
> +								&rxq-
> >fake_mbuf;
> +			}
> +			mlx5_rxq_initialize(rxq);
> +			rxq->err_state =
> MLX5_RXQ_ERR_STATE_NO_ERROR;
> +		}
> +		return ret;
> +	default:
> +		return -1;
> +	}
> +}
> +
> +/**
> + * Get size of the next packet for a given CQE. For compressed CQEs, the
> + * consumer index is updated only once all packets of the current one have
> + * been processed.
> + *
> + * @param rxq
> + *   Pointer to RX queue.
> + * @param cqe
> + *   CQE to process.
> + * @param[out] mcqe
> + *   Store pointer to mini-CQE if compressed. Otherwise, the pointer is not
> + *   written.
> + *
> + * @return
> + *   0 in case of empty CQE, otherwise the packet size in bytes.
> + */
> +static inline int
> +mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
> +		 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe)
> +{
> +	struct rxq_zip *zip = &rxq->zip;
> +	uint16_t cqe_n = cqe_cnt + 1;
> +	int len;
> +	uint16_t idx, end;
> +
> +	do {
> +		len = 0;
> +		/* Process compressed data in the CQE and mini arrays. */
> +		if (zip->ai) {
> +			volatile struct mlx5_mini_cqe8 (*mc)[8] =
> +				(volatile struct mlx5_mini_cqe8 (*)[8])
> +				(uintptr_t)(&(*rxq->cqes)[zip->ca &
> +							  cqe_cnt].pkt_info);
> +			len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt
> &
> +					       rxq->byte_mask);
> +			*mcqe = &(*mc)[zip->ai & 7];
> +			if ((++zip->ai & 7) == 0) {
> +				/* Invalidate consumed CQEs */
> +				idx = zip->ca;
> +				end = zip->na;
> +				while (idx != end) {
> +					(*rxq->cqes)[idx & cqe_cnt].op_own
> =
> +						MLX5_CQE_INVALIDATE;
> +					++idx;
> +				}
> +				/*
> +				 * Increment consumer index to skip the
> number
> +				 * of CQEs consumed. Hardware leaves holes
> in
> +				 * the CQ ring for software use.
> +				 */
> +				zip->ca = zip->na;
> +				zip->na += 8;
> +			}
> +			if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
> +				/* Invalidate the rest */
> +				idx = zip->ca;
> +				end = zip->cq_ci;
> +
> +				while (idx != end) {
> +					(*rxq->cqes)[idx & cqe_cnt].op_own
> =
> +						MLX5_CQE_INVALIDATE;
> +					++idx;
> +				}
> +				rxq->cq_ci = zip->cq_ci;
> +				zip->ai = 0;
> +			}
> +		/*
> +		 * No compressed data, get next CQE and verify if it is
> +		 * compressed.
> +		 */
> +		} else {
> +			int ret;
> +			int8_t op_own;
> +			uint32_t cq_ci;
> +
> +			ret = check_cqe(cqe, cqe_n, rxq->cq_ci);
> +			if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
> +				if (unlikely(ret == MLX5_CQE_STATUS_ERR ||
> +					     rxq->err_state)) {
> +					ret = mlx5_rx_err_handle(rxq, 0);
> +					if (ret ==
> MLX5_CQE_STATUS_HW_OWN ||
> +					    ret == -1)
> +						return 0;
> +				} else {
> +					return 0;
> +				}
> +			}
> +			/*
> +			 * Introduce the local variable to have queue cq_ci
> +			 * index in queue structure always consistent with
> +			 * actual CQE boundary (not pointing to the middle
> +			 * of compressed CQE session).
> +			 */
> +			cq_ci = rxq->cq_ci + 1;
> +			op_own = cqe->op_own;
> +			if (MLX5_CQE_FORMAT(op_own) ==
> MLX5_COMPRESSED) {
> +				volatile struct mlx5_mini_cqe8 (*mc)[8] =
> +					(volatile struct mlx5_mini_cqe8
> (*)[8])
> +					(uintptr_t)(&(*rxq->cqes)
> +						[cq_ci & cqe_cnt].pkt_info);
> +
> +				/* Fix endianness. */
> +				zip->cqe_cnt = rte_be_to_cpu_32(cqe-
> >byte_cnt);
> +				/*
> +				 * Current mini array position is the one
> +				 * returned by check_cqe64().
> +				 *
> +				 * If completion comprises several mini
> arrays,
> +				 * as a special case the second one is located
> +				 * 7 CQEs after the initial CQE instead of 8
> +				 * for subsequent ones.
> +				 */
> +				zip->ca = cq_ci;
> +				zip->na = zip->ca + 7;
> +				/* Compute the next non compressed CQE.
> */
> +				zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
> +				/* Get packet size to return. */
> +				len = rte_be_to_cpu_32((*mc)[0].byte_cnt &
> +						       rxq->byte_mask);
> +				*mcqe = &(*mc)[0];
> +				zip->ai = 1;
> +				/* Prefetch all to be invalidated */
> +				idx = zip->ca;
> +				end = zip->cq_ci;
> +				while (idx != end) {
> +					rte_prefetch0(&(*rxq->cqes)[(idx) &
> +								    cqe_cnt]);
> +					++idx;
> +				}
> +			} else {
> +				rxq->cq_ci = cq_ci;
> +				len = rte_be_to_cpu_32(cqe->byte_cnt);
> +			}
> +		}
> +		if (unlikely(rxq->err_state)) {
> +			cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
> +			++rxq->stats.idropped;
> +		} else {
> +			return len;
> +		}
> +	} while (1);
> +}
> +
> +/**
> + * Translate RX completion flags to offload flags.
> + *
> + * @param[in] cqe
> + *   Pointer to CQE.
> + *
> + * @return
> + *   Offload flags (ol_flags) for struct rte_mbuf.
> + */
> +static inline uint32_t
> +rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe)
> +{
> +	uint32_t ol_flags = 0;
> +	uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc);
> +
> +	ol_flags =
> +		TRANSPOSE(flags,
> +			  MLX5_CQE_RX_L3_HDR_VALID,
> +			  PKT_RX_IP_CKSUM_GOOD) |
> +		TRANSPOSE(flags,
> +			  MLX5_CQE_RX_L4_HDR_VALID,
> +			  PKT_RX_L4_CKSUM_GOOD);
> +	return ol_flags;
> +}
> +
> +/**
> + * Fill in mbuf fields from RX completion flags.
> + * Note that pkt->ol_flags should be initialized outside of this function.
> + *
> + * @param rxq
> + *   Pointer to RX queue.
> + * @param pkt
> + *   mbuf to fill.
> + * @param cqe
> + *   CQE to process.
> + * @param rss_hash_res
> + *   Packet RSS Hash result.
> + */
> +static inline void
> +rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
> +	       volatile struct mlx5_cqe *cqe,
> +	       volatile struct mlx5_mini_cqe8 *mcqe)
> +{
> +	/* Update packet information. */
> +	pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe, mcqe);
> +
> +	if (rxq->rss_hash) {
> +		uint32_t rss_hash_res = 0;
> +
> +		/* If compressed, take hash result from mini-CQE. */
> +		if (mcqe == NULL ||
> +		    rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH)
> +			rss_hash_res = rte_be_to_cpu_32(cqe-
> >rx_hash_res);
> +		else
> +			rss_hash_res = rte_be_to_cpu_32(mcqe-
> >rx_hash_result);
> +		if (rss_hash_res) {
> +			pkt->hash.rss = rss_hash_res;
> +			pkt->ol_flags |= PKT_RX_RSS_HASH;
> +		}
> +	}
> +	if (rxq->mark) {
> +		uint32_t mark = 0;
> +
> +		/* If compressed, take flow tag from mini-CQE. */
> +		if (mcqe == NULL ||
> +		    rxq->mcqe_format !=
> MLX5_CQE_RESP_FORMAT_FTAG_STRIDX)
> +			mark = cqe->sop_drop_qpn;
> +		else
> +			mark = ((mcqe->byte_cnt_flow & 0xff) << 8) |
> +				(mcqe->flow_tag_high << 16);
> +		if (MLX5_FLOW_MARK_IS_VALID(mark)) {
> +			pkt->ol_flags |= PKT_RX_FDIR;
> +			if (mark !=
> RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
> +				pkt->ol_flags |= PKT_RX_FDIR_ID;
> +				pkt->hash.fdir.hi =
> mlx5_flow_mark_get(mark);
> +			}
> +		}
> +	}
> +	if (rxq->dynf_meta) {
> +		uint32_t meta = cqe->flow_table_metadata &
> +				rxq->flow_meta_port_mask;
> +
> +		if (meta) {
> +			pkt->ol_flags |= rxq->flow_meta_mask;
> +			*RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset,
> +						uint32_t *) = meta;
> +		}
> +	}
> +	if (rxq->csum)
> +		pkt->ol_flags |= rxq_cq_to_ol_flags(cqe);
> +	if (rxq->vlan_strip) {
> +		bool vlan_strip;
> +
> +		if (mcqe == NULL ||
> +		    rxq->mcqe_format !=
> MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
> +			vlan_strip = cqe->hdr_type_etc &
> +				     RTE_BE16(MLX5_CQE_VLAN_STRIPPED);
> +		else
> +			vlan_strip = mcqe->hdr_type &
> +				     RTE_BE16(MLX5_CQE_VLAN_STRIPPED);
> +		if (vlan_strip) {
> +			pkt->ol_flags |= PKT_RX_VLAN |
> PKT_RX_VLAN_STRIPPED;
> +			pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info);
> +		}
> +	}
> +	if (rxq->hw_timestamp) {
> +		uint64_t ts = rte_be_to_cpu_64(cqe->timestamp);
> +
> +		if (rxq->rt_timestamp)
> +			ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts);
> +		mlx5_timestamp_set(pkt, rxq->timestamp_offset, ts);
> +		pkt->ol_flags |= rxq->timestamp_rx_flag;
> +	}
> +}
> +
> +/**
> + * DPDK callback for RX.
> + *
> + * @param dpdk_rxq
> + *   Generic pointer to RX queue structure.
> + * @param[out] pkts
> + *   Array to store received packets.
> + * @param pkts_n
> + *   Maximum number of packets in array.
> + *
> + * @return
> + *   Number of packets successfully received (<= pkts_n).
> + */
> +uint16_t
> +mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
> +{
> +	struct mlx5_rxq_data *rxq = dpdk_rxq;
> +	const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
> +	const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
> +	const unsigned int sges_n = rxq->sges_n;
> +	struct rte_mbuf *pkt = NULL;
> +	struct rte_mbuf *seg = NULL;
> +	volatile struct mlx5_cqe *cqe =
> +		&(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
> +	unsigned int i = 0;
> +	unsigned int rq_ci = rxq->rq_ci << sges_n;
> +	int len = 0; /* keep its value across iterations. */
> +
> +	while (pkts_n) {
> +		unsigned int idx = rq_ci & wqe_cnt;
> +		volatile struct mlx5_wqe_data_seg *wqe =
> +			&((volatile struct mlx5_wqe_data_seg *)rxq-
> >wqes)[idx];
> +		struct rte_mbuf *rep = (*rxq->elts)[idx];
> +		volatile struct mlx5_mini_cqe8 *mcqe = NULL;
> +
> +		if (pkt)
> +			NEXT(seg) = rep;
> +		seg = rep;
> +		rte_prefetch0(seg);
> +		rte_prefetch0(cqe);
> +		rte_prefetch0(wqe);
> +		/* Allocate the buf from the same pool. */
> +		rep = rte_mbuf_raw_alloc(seg->pool);
> +		if (unlikely(rep == NULL)) {
> +			++rxq->stats.rx_nombuf;
> +			if (!pkt) {
> +				/*
> +				 * no buffers before we even started,
> +				 * bail out silently.
> +				 */
> +				break;
> +			}
> +			while (pkt != seg) {
> +				MLX5_ASSERT(pkt != (*rxq->elts)[idx]);
> +				rep = NEXT(pkt);
> +				NEXT(pkt) = NULL;
> +				NB_SEGS(pkt) = 1;
> +				rte_mbuf_raw_free(pkt);
> +				pkt = rep;
> +			}
> +			rq_ci >>= sges_n;
> +			++rq_ci;
> +			rq_ci <<= sges_n;
> +			break;
> +		}
> +		if (!pkt) {
> +			cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
> +			len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe);
> +			if (!len) {
> +				rte_mbuf_raw_free(rep);
> +				break;
> +			}
> +			pkt = seg;
> +			MLX5_ASSERT(len >= (rxq->crc_present << 2));
> +			pkt->ol_flags &= EXT_ATTACHED_MBUF;
> +			rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe);
> +			if (rxq->crc_present)
> +				len -= RTE_ETHER_CRC_LEN;
> +			PKT_LEN(pkt) = len;
> +			if (cqe->lro_num_seg > 1) {
> +				mlx5_lro_update_hdr
> +					(rte_pktmbuf_mtod(pkt, uint8_t *),
> cqe,
> +					 mcqe, rxq, len);
> +				pkt->ol_flags |= PKT_RX_LRO;
> +				pkt->tso_segsz = len / cqe->lro_num_seg;
> +			}
> +		}
> +		DATA_LEN(rep) = DATA_LEN(seg);
> +		PKT_LEN(rep) = PKT_LEN(seg);
> +		SET_DATA_OFF(rep, DATA_OFF(seg));
> +		PORT(rep) = PORT(seg);
> +		(*rxq->elts)[idx] = rep;
> +		/*
> +		 * Fill NIC descriptor with the new buffer. The lkey and size
> +		 * of the buffers are already known, only the buffer address
> +		 * changes.
> +		 */
> +		wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep,
> uintptr_t));
> +		/* If there's only one MR, no need to replace LKey in WQE. */
> +		if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) >
> 1))
> +			wqe->lkey = mlx5_rx_mb2mr(rxq, rep);
> +		if (len > DATA_LEN(seg)) {
> +			len -= DATA_LEN(seg);
> +			++NB_SEGS(pkt);
> +			++rq_ci;
> +			continue;
> +		}
> +		DATA_LEN(seg) = len;
> +#ifdef MLX5_PMD_SOFT_COUNTERS
> +		/* Increment bytes counter. */
> +		rxq->stats.ibytes += PKT_LEN(pkt);
> +#endif
> +		/* Return packet. */
> +		*(pkts++) = pkt;
> +		pkt = NULL;
> +		--pkts_n;
> +		++i;
> +		/* Align consumer index to the next stride. */
> +		rq_ci >>= sges_n;
> +		++rq_ci;
> +		rq_ci <<= sges_n;
> +	}
> +	if (unlikely(i == 0 && ((rq_ci >> sges_n) == rxq->rq_ci)))
> +		return 0;
> +	/* Update the consumer index. */
> +	rxq->rq_ci = rq_ci >> sges_n;
> +	rte_io_wmb();
> +	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
> +	rte_io_wmb();
> +	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
> +#ifdef MLX5_PMD_SOFT_COUNTERS
> +	/* Increment packets counter. */
> +	rxq->stats.ipackets += i;
> +#endif
> +	return i;
> +}
> +
> +/**
> + * Update LRO packet TCP header.
> + * The HW LRO feature doesn't update the TCP header after coalescing the
> + * TCP segments but supplies information in CQE to fill it by SW.
> + *
> + * @param tcp
> + *   Pointer to the TCP header.
> + * @param cqe
> + *   Pointer to the completion entry.
> + * @param phcsum
> + *   The L3 pseudo-header checksum.
> + */
> +static inline void
> +mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp,
> +			volatile struct mlx5_cqe *__rte_restrict cqe,
> +			uint32_t phcsum, uint8_t l4_type)
> +{
> +	/*
> +	 * The HW calculates only the TCP payload checksum, need to
> complete
> +	 * the TCP header checksum and the L3 pseudo-header checksum.
> +	 */
> +	uint32_t csum = phcsum + cqe->csum;
> +
> +	if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK ||
> +	    l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) {
> +		tcp->tcp_flags |= RTE_TCP_ACK_FLAG;
> +		tcp->recv_ack = cqe->lro_ack_seq_num;
> +		tcp->rx_win = cqe->lro_tcp_win;
> +	}
> +	if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK)
> +		tcp->tcp_flags |= RTE_TCP_PSH_FLAG;
> +	tcp->cksum = 0;
> +	csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4);
> +	csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff);
> +	csum = (~csum) & 0xffff;
> +	if (csum == 0)
> +		csum = 0xffff;
> +	tcp->cksum = csum;
> +}
> +
> +/**
> + * Update LRO packet headers.
> + * The HW LRO feature doesn't update the L3/TCP headers after coalescing
> the
> + * TCP segments but supply information in CQE to fill it by SW.
> + *
> + * @param padd
> + *   The packet address.
> + * @param cqe
> + *   Pointer to the completion entry.
> + * @param len
> + *   The packet length.
> + */
> +static inline void
> +mlx5_lro_update_hdr(uint8_t *__rte_restrict padd,
> +		    volatile struct mlx5_cqe *__rte_restrict cqe,
> +		    volatile struct mlx5_mini_cqe8 *mcqe,
> +		    struct mlx5_rxq_data *rxq, uint32_t len)
> +{
> +	union {
> +		struct rte_ether_hdr *eth;
> +		struct rte_vlan_hdr *vlan;
> +		struct rte_ipv4_hdr *ipv4;
> +		struct rte_ipv6_hdr *ipv6;
> +		struct rte_tcp_hdr *tcp;
> +		uint8_t *hdr;
> +	} h = {
> +		.hdr = padd,
> +	};
> +	uint16_t proto = h.eth->ether_type;
> +	uint32_t phcsum;
> +	uint8_t l4_type;
> +
> +	h.eth++;
> +	while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) ||
> +	       proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) {
> +		proto = h.vlan->eth_proto;
> +		h.vlan++;
> +	}
> +	if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) {
> +		h.ipv4->time_to_live = cqe->lro_min_ttl;
> +		h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr -
> padd));
> +		h.ipv4->hdr_checksum = 0;
> +		h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4);
> +		phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0);
> +		h.ipv4++;
> +	} else {
> +		h.ipv6->hop_limits = cqe->lro_min_ttl;
> +		h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd)
> -
> +						       sizeof(*h.ipv6));
> +		phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0);
> +		h.ipv6++;
> +	}
> +	if (mcqe == NULL ||
> +	    rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
> +		l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) &
> +			   MLX5_CQE_L4_TYPE_MASK) >>
> MLX5_CQE_L4_TYPE_SHIFT;
> +	else
> +		l4_type = (rte_be_to_cpu_16(mcqe->hdr_type) &
> +			   MLX5_CQE_L4_TYPE_MASK) >>
> MLX5_CQE_L4_TYPE_SHIFT;
> +	mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum, l4_type);
> +}
> +
> +void
> +mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque)
> +{
> +	struct mlx5_mprq_buf *buf = opaque;
> +
> +	if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) == 1) {
> +		rte_mempool_put(buf->mp, buf);
> +	} else if (unlikely(__atomic_sub_fetch(&buf->refcnt, 1,
> +					       __ATOMIC_RELAXED) == 0)) {
> +		__atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED);
> +		rte_mempool_put(buf->mp, buf);
> +	}
> +}
> +
> +void
> +mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf)
> +{
> +	mlx5_mprq_buf_free_cb(NULL, buf);
> +}
> +
> +/**
> + * DPDK callback for RX with Multi-Packet RQ support.
> + *
> + * @param dpdk_rxq
> + *   Generic pointer to RX queue structure.
> + * @param[out] pkts
> + *   Array to store received packets.
> + * @param pkts_n
> + *   Maximum number of packets in array.
> + *
> + * @return
> + *   Number of packets successfully received (<= pkts_n).
> + */
> +uint16_t
> +mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t
> pkts_n)
> +{
> +	struct mlx5_rxq_data *rxq = dpdk_rxq;
> +	const uint32_t strd_n = 1 << rxq->strd_num_n;
> +	const uint32_t strd_sz = 1 << rxq->strd_sz_n;
> +	const uint32_t cq_mask = (1 << rxq->cqe_n) - 1;
> +	const uint32_t wq_mask = (1 << rxq->elts_n) - 1;
> +	volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci &
> cq_mask];
> +	unsigned int i = 0;
> +	uint32_t rq_ci = rxq->rq_ci;
> +	uint16_t consumed_strd = rxq->consumed_strd;
> +	struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
> +
> +	while (i < pkts_n) {
> +		struct rte_mbuf *pkt;
> +		int ret;
> +		uint32_t len;
> +		uint16_t strd_cnt;
> +		uint16_t strd_idx;
> +		uint32_t byte_cnt;
> +		volatile struct mlx5_mini_cqe8 *mcqe = NULL;
> +		enum mlx5_rqx_code rxq_code;
> +
> +		if (consumed_strd == strd_n) {
> +			/* Replace WQE if the buffer is still in use. */
> +			mprq_buf_replace(rxq, rq_ci & wq_mask);
> +			/* Advance to the next WQE. */
> +			consumed_strd = 0;
> +			++rq_ci;
> +			buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
> +		}
> +		cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
> +		ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe);
> +		if (!ret)
> +			break;
> +		byte_cnt = ret;
> +		len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >>
> MLX5_MPRQ_LEN_SHIFT;
> +		MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
> +		if (rxq->crc_present)
> +			len -= RTE_ETHER_CRC_LEN;
> +		if (mcqe &&
> +		    rxq->mcqe_format ==
> MLX5_CQE_RESP_FORMAT_FTAG_STRIDX)
> +			strd_cnt = (len / strd_sz) + !!(len % strd_sz);
> +		else
> +			strd_cnt = (byte_cnt &
> MLX5_MPRQ_STRIDE_NUM_MASK) >>
> +				   MLX5_MPRQ_STRIDE_NUM_SHIFT;
> +		MLX5_ASSERT(strd_cnt);
> +		consumed_strd += strd_cnt;
> +		if (byte_cnt & MLX5_MPRQ_FILLER_MASK)
> +			continue;
> +		strd_idx = rte_be_to_cpu_16(mcqe == NULL ?
> +					cqe->wqe_counter :
> +					mcqe->stride_idx);
> +		MLX5_ASSERT(strd_idx < strd_n);
> +		MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci)
> &
> +			    wq_mask));
> +		pkt = rte_pktmbuf_alloc(rxq->mp);
> +		if (unlikely(pkt == NULL)) {
> +			++rxq->stats.rx_nombuf;
> +			break;
> +		}
> +		len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >>
> MLX5_MPRQ_LEN_SHIFT;
> +		MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
> +		if (rxq->crc_present)
> +			len -= RTE_ETHER_CRC_LEN;
> +		rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf,
> +					   strd_idx, strd_cnt);
> +		if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
> +			rte_pktmbuf_free_seg(pkt);
> +			if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
> +				++rxq->stats.idropped;
> +				continue;
> +			}
> +			if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
> +				++rxq->stats.rx_nombuf;
> +				break;
> +			}
> +		}
> +		rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe);
> +		if (cqe->lro_num_seg > 1) {
> +			mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt,
> uint8_t *),
> +					    cqe, mcqe, rxq, len);
> +			pkt->ol_flags |= PKT_RX_LRO;
> +			pkt->tso_segsz = len / cqe->lro_num_seg;
> +		}
> +		PKT_LEN(pkt) = len;
> +		PORT(pkt) = rxq->port_id;
> +#ifdef MLX5_PMD_SOFT_COUNTERS
> +		/* Increment bytes counter. */
> +		rxq->stats.ibytes += PKT_LEN(pkt);
> +#endif
> +		/* Return packet. */
> +		*(pkts++) = pkt;
> +		++i;
> +	}
> +	/* Update the consumer indexes. */
> +	rxq->consumed_strd = consumed_strd;
> +	rte_io_wmb();
> +	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
> +	if (rq_ci != rxq->rq_ci) {
> +		rxq->rq_ci = rq_ci;
> +		rte_io_wmb();
> +		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
> +	}
> +#ifdef MLX5_PMD_SOFT_COUNTERS
> +	/* Increment packets counter. */
> +	rxq->stats.ipackets += i;
> +#endif
> +	return i;
> +}
> +
> +/**
> + * Dummy DPDK callback for RX.
> + *
> + * This function is used to temporarily replace the real callback during
> + * unsafe control operations on the queue, or in case of error.
> + *
> + * @param dpdk_rxq
> + *   Generic pointer to RX queue structure.
> + * @param[out] pkts
> + *   Array to store received packets.
> + * @param pkts_n
> + *   Maximum number of packets in array.
> + *
> + * @return
> + *   Number of packets successfully received (<= pkts_n).
> + */
> +uint16_t
> +removed_rx_burst(void *dpdk_rxq __rte_unused,
> +		 struct rte_mbuf **pkts __rte_unused,
> +		 uint16_t pkts_n __rte_unused)
> +{
> +	rte_mb();
> +	return 0;
> +}
> +
> +/*
> + * Vectorized Rx routines are not compiled in when required vector
> instructions
> + * are not supported on a target architecture.
> + * The following null stubs are needed for linkage when those are not
> included
> + * outside of this file (e.g. mlx5_rxtx_vec_sse.c for x86).
> + */
> +
> +__rte_weak uint16_t
> +mlx5_rx_burst_vec(void *dpdk_rxq __rte_unused,
> +		  struct rte_mbuf **pkts __rte_unused,
> +		  uint16_t pkts_n __rte_unused)
> +{
> +	return 0;
> +}
> +
> +__rte_weak uint16_t
> +mlx5_rx_burst_mprq_vec(void *dpdk_rxq __rte_unused,
> +		       struct rte_mbuf **pkts __rte_unused,
> +		       uint16_t pkts_n __rte_unused)
> +{
> +	return 0;
> +}
> +
> +__rte_weak int
> +mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused)
> +{
> +	return -ENOTSUP;
> +}
> +
> +__rte_weak int
> +mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused)
> +{
> +	return -ENOTSUP;
> +}
> +
> diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
> index 83b1f38..d5a2de8 100644
> --- a/drivers/net/mlx5/mlx5_rx.h
> +++ b/drivers/net/mlx5/mlx5_rx.h
> @@ -246,7 +246,7 @@ int mlx5_hrxq_modify(struct rte_eth_dev *dev,
> uint32_t hxrq_idx,
>  		     uint64_t hash_fields,
>  		     const uint16_t *queues, uint32_t queues_n);
> 
> -/* mlx5_rxtx.c */
> +/* mlx5_rx.c */
> 
>  uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t
> pkts_n);
>  void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq);
> @@ -264,7 +264,7 @@ void mlx5_rxq_info_get(struct rte_eth_dev *dev,
> uint16_t queue_id,
>  int mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t
> rx_queue_id,
>  			   struct rte_eth_burst_mode *mode);
> 
> -/* Vectorized version of mlx5_rxtx.c */
> +/* Vectorized version of mlx5_rx.c */
>  int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq_data);
>  int mlx5_check_vec_rx_support(struct rte_eth_dev *dev);
>  uint16_t mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts,
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index d004e1e..c7f2605 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -130,37 +130,6 @@ enum mlx5_txcmp_code {
>  static_assert(MLX5_WQE_SIZE == 4 * MLX5_WSEG_SIZE,
>  		"invalid WQE size");
> 
> -static __rte_always_inline uint32_t
> -rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe
> *cqe,
> -				   volatile struct mlx5_mini_cqe8 *mcqe);
> -
> -static __rte_always_inline int
> -mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
> -		 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe);
> -
> -static __rte_always_inline uint32_t
> -rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe);
> -
> -static __rte_always_inline void
> -rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
> -	       volatile struct mlx5_cqe *cqe,
> -	       volatile struct mlx5_mini_cqe8 *mcqe);
> -
> -static int
> -mlx5_queue_state_modify(struct rte_eth_dev *dev,
> -			struct mlx5_mp_arg_queue_state_modify *sm);
> -
> -static inline void
> -mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp,
> -			volatile struct mlx5_cqe *__rte_restrict cqe,
> -			uint32_t phcsum, uint8_t l4_type);
> -
> -static inline void
> -mlx5_lro_update_hdr(uint8_t *__rte_restrict padd,
> -		    volatile struct mlx5_cqe *__rte_restrict cqe,
> -			volatile struct mlx5_mini_cqe8 *mcqe,
> -		    struct mlx5_rxq_data *rxq, uint32_t len);
> -
>  uint32_t mlx5_ptype_table[] __rte_cache_aligned = {
>  	[0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */
>  };
> @@ -499,220 +468,6 @@ enum mlx5_txcmp_code {
>  	return mlx5_cksum_table[idx];
>  }
> 
> -/**
> - * Internal function to compute the number of used descriptors in an RX
> queue
> - *
> - * @param rxq
> - *   The Rx queue.
> - *
> - * @return
> - *   The number of used rx descriptor.
> - */
> -static uint32_t
> -rx_queue_count(struct mlx5_rxq_data *rxq)
> -{
> -	struct rxq_zip *zip = &rxq->zip;
> -	volatile struct mlx5_cqe *cqe;
> -	const unsigned int cqe_n = (1 << rxq->cqe_n);
> -	const unsigned int sges_n = (1 << rxq->sges_n);
> -	const unsigned int elts_n = (1 << rxq->elts_n);
> -	const unsigned int strd_n = (1 << rxq->strd_num_n);
> -	const unsigned int cqe_cnt = cqe_n - 1;
> -	unsigned int cq_ci, used;
> -
> -	/* if we are processing a compressed cqe */
> -	if (zip->ai) {
> -		used = zip->cqe_cnt - zip->ai;
> -		cq_ci = zip->cq_ci;
> -	} else {
> -		used = 0;
> -		cq_ci = rxq->cq_ci;
> -	}
> -	cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
> -	while (check_cqe(cqe, cqe_n, cq_ci) !=
> MLX5_CQE_STATUS_HW_OWN) {
> -		int8_t op_own;
> -		unsigned int n;
> -
> -		op_own = cqe->op_own;
> -		if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED)
> -			n = rte_be_to_cpu_32(cqe->byte_cnt);
> -		else
> -			n = 1;
> -		cq_ci += n;
> -		used += n;
> -		cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
> -	}
> -	used = RTE_MIN(used * sges_n, elts_n * strd_n);
> -	return used;
> -}
> -
> -/**
> - * DPDK callback to check the status of a rx descriptor.
> - *
> - * @param rx_queue
> - *   The Rx queue.
> - * @param[in] offset
> - *   The index of the descriptor in the ring.
> - *
> - * @return
> - *   The status of the tx descriptor.
> - */
> -int
> -mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
> -{
> -	struct mlx5_rxq_data *rxq = rx_queue;
> -	struct mlx5_rxq_ctrl *rxq_ctrl =
> -			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
> -	struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv);
> -
> -	if (dev->rx_pkt_burst == NULL ||
> -	    dev->rx_pkt_burst == removed_rx_burst) {
> -		rte_errno = ENOTSUP;
> -		return -rte_errno;
> -	}
> -	if (offset >= (1 << rxq->cqe_n)) {
> -		rte_errno = EINVAL;
> -		return -rte_errno;
> -	}
> -	if (offset < rx_queue_count(rxq))
> -		return RTE_ETH_RX_DESC_DONE;
> -	return RTE_ETH_RX_DESC_AVAIL;
> -}
> -
> -/**
> - * DPDK callback to get the RX queue information
> - *
> - * @param dev
> - *   Pointer to the device structure.
> - *
> - * @param rx_queue_id
> - *   Rx queue identificator.
> - *
> - * @param qinfo
> - *   Pointer to the RX queue information structure.
> - *
> - * @return
> - *   None.
> - */
> -
> -void
> -mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id,
> -		  struct rte_eth_rxq_info *qinfo)
> -{
> -	struct mlx5_priv *priv = dev->data->dev_private;
> -	struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
> -	struct mlx5_rxq_ctrl *rxq_ctrl =
> -		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
> -
> -	if (!rxq)
> -		return;
> -	qinfo->mp = mlx5_rxq_mprq_enabled(rxq) ?
> -					rxq->mprq_mp : rxq->mp;
> -	qinfo->conf.rx_thresh.pthresh = 0;
> -	qinfo->conf.rx_thresh.hthresh = 0;
> -	qinfo->conf.rx_thresh.wthresh = 0;
> -	qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh;
> -	qinfo->conf.rx_drop_en = 1;
> -	qinfo->conf.rx_deferred_start = rxq_ctrl ? 0 : 1;
> -	qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
> -	qinfo->scattered_rx = dev->data->scattered_rx;
> -	qinfo->nb_desc = mlx5_rxq_mprq_enabled(rxq) ?
> -		(1 << rxq->elts_n) * (1 << rxq->strd_num_n) :
> -		(1 << rxq->elts_n);
> -}
> -
> -/**
> - * DPDK callback to get the RX packet burst mode information
> - *
> - * @param dev
> - *   Pointer to the device structure.
> - *
> - * @param rx_queue_id
> - *   Rx queue identificatior.
> - *
> - * @param mode
> - *   Pointer to the burts mode information.
> - *
> - * @return
> - *   0 as success, -EINVAL as failure.
> - */
> -
> -int
> -mlx5_rx_burst_mode_get(struct rte_eth_dev *dev,
> -		       uint16_t rx_queue_id __rte_unused,
> -		       struct rte_eth_burst_mode *mode)
> -{
> -	eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
> -	struct mlx5_priv *priv = dev->data->dev_private;
> -	struct mlx5_rxq_data *rxq;
> -
> -	rxq = (*priv->rxqs)[rx_queue_id];
> -	if (!rxq) {
> -		rte_errno = EINVAL;
> -		return -rte_errno;
> -	}
> -	if (pkt_burst == mlx5_rx_burst) {
> -		snprintf(mode->info, sizeof(mode->info), "%s", "Scalar");
> -	} else if (pkt_burst == mlx5_rx_burst_mprq) {
> -		snprintf(mode->info, sizeof(mode->info), "%s", "Multi-
> Packet RQ");
> -	} else if (pkt_burst == mlx5_rx_burst_vec) {
> -#if defined RTE_ARCH_X86_64
> -		snprintf(mode->info, sizeof(mode->info), "%s", "Vector
> SSE");
> -#elif defined RTE_ARCH_ARM64
> -		snprintf(mode->info, sizeof(mode->info), "%s", "Vector
> Neon");
> -#elif defined RTE_ARCH_PPC_64
> -		snprintf(mode->info, sizeof(mode->info), "%s", "Vector
> AltiVec");
> -#else
> -		return -EINVAL;
> -#endif
> -	} else if (pkt_burst == mlx5_rx_burst_mprq_vec) {
> -#if defined RTE_ARCH_X86_64
> -		snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ
> Vector SSE");
> -#elif defined RTE_ARCH_ARM64
> -		snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ
> Vector Neon");
> -#elif defined RTE_ARCH_PPC_64
> -		snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ
> Vector AltiVec");
> -#else
> -		return -EINVAL;
> -#endif
> -	} else {
> -		return -EINVAL;
> -	}
> -	return 0;
> -}
> -
> -/**
> - * DPDK callback to get the number of used descriptors in a RX queue
> - *
> - * @param dev
> - *   Pointer to the device structure.
> - *
> - * @param rx_queue_id
> - *   The Rx queue.
> - *
> - * @return
> - *   The number of used rx descriptor.
> - *   -EINVAL if the queue is invalid
> - */
> -uint32_t
> -mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
> -{
> -	struct mlx5_priv *priv = dev->data->dev_private;
> -	struct mlx5_rxq_data *rxq;
> -
> -	if (dev->rx_pkt_burst == NULL ||
> -	    dev->rx_pkt_burst == removed_rx_burst) {
> -		rte_errno = ENOTSUP;
> -		return -rte_errno;
> -	}
> -	rxq = (*priv->rxqs)[rx_queue_id];
> -	if (!rxq) {
> -		rte_errno = EINVAL;
> -		return -rte_errno;
> -	}
> -	return rx_queue_count(rxq);
> -}
> -
>  #define MLX5_SYSTEM_LOG_DIR "/var/log"
>  /**
>   * Dump debug information to log file.
> @@ -863,101 +618,6 @@ enum mlx5_txcmp_code {
>  }
> 
>  /**
> - * Translate RX completion flags to packet type.
> - *
> - * @param[in] rxq
> - *   Pointer to RX queue structure.
> - * @param[in] cqe
> - *   Pointer to CQE.
> - *
> - * @note: fix mlx5_dev_supported_ptypes_get() if any change here.
> - *
> - * @return
> - *   Packet type for struct rte_mbuf.
> - */
> -static inline uint32_t
> -rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe
> *cqe,
> -				   volatile struct mlx5_mini_cqe8 *mcqe)
> -{
> -	uint8_t idx;
> -	uint8_t ptype;
> -	uint8_t pinfo = (cqe->pkt_info & 0x3) << 6;
> -
> -	/* Get l3/l4 header from mini-CQE in case L3/L4 format*/
> -	if (mcqe == NULL ||
> -	    rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
> -		ptype = (cqe->hdr_type_etc & 0xfc00) >> 10;
> -	else
> -		ptype = mcqe->hdr_type >> 2;
> -	/*
> -	 * The index to the array should have:
> -	 * bit[1:0] = l3_hdr_type
> -	 * bit[4:2] = l4_hdr_type
> -	 * bit[5] = ip_frag
> -	 * bit[6] = tunneled
> -	 * bit[7] = outer_l3_type
> -	 */
> -	idx = pinfo | ptype;
> -	return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6));
> -}
> -
> -/**
> - * Initialize Rx WQ and indexes.
> - *
> - * @param[in] rxq
> - *   Pointer to RX queue structure.
> - */
> -void
> -mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
> -{
> -	const unsigned int wqe_n = 1 << rxq->elts_n;
> -	unsigned int i;
> -
> -	for (i = 0; (i != wqe_n); ++i) {
> -		volatile struct mlx5_wqe_data_seg *scat;
> -		uintptr_t addr;
> -		uint32_t byte_count;
> -
> -		if (mlx5_rxq_mprq_enabled(rxq)) {
> -			struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i];
> -
> -			scat = &((volatile struct mlx5_wqe_mprq *)
> -				rxq->wqes)[i].dseg;
> -			addr = (uintptr_t)mlx5_mprq_buf_addr(buf,
> -							 1 << rxq-
> >strd_num_n);
> -			byte_count = (1 << rxq->strd_sz_n) *
> -					(1 << rxq->strd_num_n);
> -		} else {
> -			struct rte_mbuf *buf = (*rxq->elts)[i];
> -
> -			scat = &((volatile struct mlx5_wqe_data_seg *)
> -					rxq->wqes)[i];
> -			addr = rte_pktmbuf_mtod(buf, uintptr_t);
> -			byte_count = DATA_LEN(buf);
> -		}
> -		/* scat->addr must be able to store a pointer. */
> -		MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t));
> -		*scat = (struct mlx5_wqe_data_seg){
> -			.addr = rte_cpu_to_be_64(addr),
> -			.byte_count = rte_cpu_to_be_32(byte_count),
> -			.lkey = mlx5_rx_addr2mr(rxq, addr),
> -		};
> -	}
> -	rxq->consumed_strd = 0;
> -	rxq->decompressed = 0;
> -	rxq->rq_pi = 0;
> -	rxq->zip = (struct rxq_zip){
> -		.ai = 0,
> -	};
> -	rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ?
> -		(wqe_n >> rxq->sges_n) * (1 << rxq->strd_num_n) : 0;
> -	/* Update doorbell counter. */
> -	rxq->rq_ci = wqe_n >> rxq->sges_n;
> -	rte_io_wmb();
> -	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
> -}
> -
> -/**
>   * Modify a Verbs/DevX queue state.
>   * This must be called from the primary process.
>   *
> @@ -1013,7 +673,7 @@ enum mlx5_txcmp_code {
>   * @return
>   *   0 in case of success else non-zero value.
>   */
> -static int
> +int
>  mlx5_queue_state_modify(struct rte_eth_dev *dev,
>  			struct mlx5_mp_arg_queue_state_modify *sm)
>  {
> @@ -1034,783 +694,6 @@ enum mlx5_txcmp_code {
>  }
> 
>  /**
> - * Handle a Rx error.
> - * The function inserts the RQ state to reset when the first error CQE is
> - * shown, then drains the CQ by the caller function loop. When the CQ is
> empty,
> - * it moves the RQ state to ready and initializes the RQ.
> - * Next CQE identification and error counting are in the caller responsibility.
> - *
> - * @param[in] rxq
> - *   Pointer to RX queue structure.
> - * @param[in] vec
> - *   1 when called from vectorized Rx burst, need to prepare mbufs for the
> RQ.
> - *   0 when called from non-vectorized Rx burst.
> - *
> - * @return
> - *   -1 in case of recovery error, otherwise the CQE status.
> - */
> -int
> -mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
> -{
> -	const uint16_t cqe_n = 1 << rxq->cqe_n;
> -	const uint16_t cqe_mask = cqe_n - 1;
> -	const uint16_t wqe_n = 1 << rxq->elts_n;
> -	const uint16_t strd_n = 1 << rxq->strd_num_n;
> -	struct mlx5_rxq_ctrl *rxq_ctrl =
> -			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
> -	union {
> -		volatile struct mlx5_cqe *cqe;
> -		volatile struct mlx5_err_cqe *err_cqe;
> -	} u = {
> -		.cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask],
> -	};
> -	struct mlx5_mp_arg_queue_state_modify sm;
> -	int ret;
> -
> -	switch (rxq->err_state) {
> -	case MLX5_RXQ_ERR_STATE_NO_ERROR:
> -		rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET;
> -		/* Fall-through */
> -	case MLX5_RXQ_ERR_STATE_NEED_RESET:
> -		sm.is_wq = 1;
> -		sm.queue_id = rxq->idx;
> -		sm.state = IBV_WQS_RESET;
> -		if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv),
> &sm))
> -			return -1;
> -		if (rxq_ctrl->dump_file_n <
> -		    rxq_ctrl->priv->config.max_dump_files_num) {
> -			MKSTR(err_str, "Unexpected CQE error syndrome "
> -			      "0x%02x CQN = %u RQN = %u wqe_counter = %u"
> -			      " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome,
> -			      rxq->cqn, rxq_ctrl->wqn,
> -			      rte_be_to_cpu_16(u.err_cqe->wqe_counter),
> -			      rxq->rq_ci << rxq->sges_n, rxq->cq_ci);
> -			MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u",
> -			      rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc());
> -			mlx5_dump_debug_information(name, NULL,
> err_str, 0);
> -			mlx5_dump_debug_information(name, "MLX5 Error
> CQ:",
> -						    (const void *)((uintptr_t)
> -								    rxq->cqes),
> -						    sizeof(*u.cqe) * cqe_n);
> -			mlx5_dump_debug_information(name, "MLX5 Error
> RQ:",
> -						    (const void *)((uintptr_t)
> -								    rxq-
> >wqes),
> -						    16 * wqe_n);
> -			rxq_ctrl->dump_file_n++;
> -		}
> -		rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY;
> -		/* Fall-through */
> -	case MLX5_RXQ_ERR_STATE_NEED_READY:
> -		ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci);
> -		if (ret == MLX5_CQE_STATUS_HW_OWN) {
> -			rte_io_wmb();
> -			*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
> -			rte_io_wmb();
> -			/*
> -			 * The RQ consumer index must be zeroed while
> moving
> -			 * from RESET state to RDY state.
> -			 */
> -			*rxq->rq_db = rte_cpu_to_be_32(0);
> -			rte_io_wmb();
> -			sm.is_wq = 1;
> -			sm.queue_id = rxq->idx;
> -			sm.state = IBV_WQS_RDY;
> -			if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl-
> >priv),
> -						    &sm))
> -				return -1;
> -			if (vec) {
> -				const uint32_t elts_n =
> -					mlx5_rxq_mprq_enabled(rxq) ?
> -					wqe_n * strd_n : wqe_n;
> -				const uint32_t e_mask = elts_n - 1;
> -				uint32_t elts_ci =
> -					mlx5_rxq_mprq_enabled(rxq) ?
> -					rxq->elts_ci : rxq->rq_ci;
> -				uint32_t elt_idx;
> -				struct rte_mbuf **elt;
> -				int i;
> -				unsigned int n = elts_n - (elts_ci -
> -							  rxq->rq_pi);
> -
> -				for (i = 0; i < (int)n; ++i) {
> -					elt_idx = (elts_ci + i) & e_mask;
> -					elt = &(*rxq->elts)[elt_idx];
> -					*elt = rte_mbuf_raw_alloc(rxq->mp);
> -					if (!*elt) {
> -						for (i--; i >= 0; --i) {
> -							elt_idx = (elts_ci +
> -								   i) & elts_n;
> -							elt = &(*rxq->elts)
> -								[elt_idx];
> -
> 	rte_pktmbuf_free_seg
> -								(*elt);
> -						}
> -						return -1;
> -					}
> -				}
> -				for (i = 0; i < (int)elts_n; ++i) {
> -					elt = &(*rxq->elts)[i];
> -					DATA_LEN(*elt) =
> -						(uint16_t)((*elt)->buf_len -
> -
> 	rte_pktmbuf_headroom(*elt));
> -				}
> -				/* Padding with a fake mbuf for vec Rx. */
> -				for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP;
> ++i)
> -					(*rxq->elts)[elts_n + i] =
> -								&rxq-
> >fake_mbuf;
> -			}
> -			mlx5_rxq_initialize(rxq);
> -			rxq->err_state =
> MLX5_RXQ_ERR_STATE_NO_ERROR;
> -		}
> -		return ret;
> -	default:
> -		return -1;
> -	}
> -}
> -
> -/**
> - * Get size of the next packet for a given CQE. For compressed CQEs, the
> - * consumer index is updated only once all packets of the current one have
> - * been processed.
> - *
> - * @param rxq
> - *   Pointer to RX queue.
> - * @param cqe
> - *   CQE to process.
> - * @param[out] mcqe
> - *   Store pointer to mini-CQE if compressed. Otherwise, the pointer is not
> - *   written.
> - *
> - * @return
> - *   0 in case of empty CQE, otherwise the packet size in bytes.
> - */
> -static inline int
> -mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
> -		 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe)
> -{
> -	struct rxq_zip *zip = &rxq->zip;
> -	uint16_t cqe_n = cqe_cnt + 1;
> -	int len;
> -	uint16_t idx, end;
> -
> -	do {
> -		len = 0;
> -		/* Process compressed data in the CQE and mini arrays. */
> -		if (zip->ai) {
> -			volatile struct mlx5_mini_cqe8 (*mc)[8] =
> -				(volatile struct mlx5_mini_cqe8 (*)[8])
> -				(uintptr_t)(&(*rxq->cqes)[zip->ca &
> -							  cqe_cnt].pkt_info);
> -			len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt
> &
> -					       rxq->byte_mask);
> -			*mcqe = &(*mc)[zip->ai & 7];
> -			if ((++zip->ai & 7) == 0) {
> -				/* Invalidate consumed CQEs */
> -				idx = zip->ca;
> -				end = zip->na;
> -				while (idx != end) {
> -					(*rxq->cqes)[idx & cqe_cnt].op_own
> =
> -						MLX5_CQE_INVALIDATE;
> -					++idx;
> -				}
> -				/*
> -				 * Increment consumer index to skip the
> number
> -				 * of CQEs consumed. Hardware leaves holes
> in
> -				 * the CQ ring for software use.
> -				 */
> -				zip->ca = zip->na;
> -				zip->na += 8;
> -			}
> -			if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
> -				/* Invalidate the rest */
> -				idx = zip->ca;
> -				end = zip->cq_ci;
> -
> -				while (idx != end) {
> -					(*rxq->cqes)[idx & cqe_cnt].op_own
> =
> -						MLX5_CQE_INVALIDATE;
> -					++idx;
> -				}
> -				rxq->cq_ci = zip->cq_ci;
> -				zip->ai = 0;
> -			}
> -		/*
> -		 * No compressed data, get next CQE and verify if it is
> -		 * compressed.
> -		 */
> -		} else {
> -			int ret;
> -			int8_t op_own;
> -			uint32_t cq_ci;
> -
> -			ret = check_cqe(cqe, cqe_n, rxq->cq_ci);
> -			if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
> -				if (unlikely(ret == MLX5_CQE_STATUS_ERR ||
> -					     rxq->err_state)) {
> -					ret = mlx5_rx_err_handle(rxq, 0);
> -					if (ret ==
> MLX5_CQE_STATUS_HW_OWN ||
> -					    ret == -1)
> -						return 0;
> -				} else {
> -					return 0;
> -				}
> -			}
> -			/*
> -			 * Introduce the local variable to have queue cq_ci
> -			 * index in queue structure always consistent with
> -			 * actual CQE boundary (not pointing to the middle
> -			 * of compressed CQE session).
> -			 */
> -			cq_ci = rxq->cq_ci + 1;
> -			op_own = cqe->op_own;
> -			if (MLX5_CQE_FORMAT(op_own) ==
> MLX5_COMPRESSED) {
> -				volatile struct mlx5_mini_cqe8 (*mc)[8] =
> -					(volatile struct mlx5_mini_cqe8
> (*)[8])
> -					(uintptr_t)(&(*rxq->cqes)
> -						[cq_ci & cqe_cnt].pkt_info);
> -
> -				/* Fix endianness. */
> -				zip->cqe_cnt = rte_be_to_cpu_32(cqe-
> >byte_cnt);
> -				/*
> -				 * Current mini array position is the one
> -				 * returned by check_cqe64().
> -				 *
> -				 * If completion comprises several mini
> arrays,
> -				 * as a special case the second one is located
> -				 * 7 CQEs after the initial CQE instead of 8
> -				 * for subsequent ones.
> -				 */
> -				zip->ca = cq_ci;
> -				zip->na = zip->ca + 7;
> -				/* Compute the next non compressed CQE.
> */
> -				zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
> -				/* Get packet size to return. */
> -				len = rte_be_to_cpu_32((*mc)[0].byte_cnt &
> -						       rxq->byte_mask);
> -				*mcqe = &(*mc)[0];
> -				zip->ai = 1;
> -				/* Prefetch all to be invalidated */
> -				idx = zip->ca;
> -				end = zip->cq_ci;
> -				while (idx != end) {
> -					rte_prefetch0(&(*rxq->cqes)[(idx) &
> -								    cqe_cnt]);
> -					++idx;
> -				}
> -			} else {
> -				rxq->cq_ci = cq_ci;
> -				len = rte_be_to_cpu_32(cqe->byte_cnt);
> -			}
> -		}
> -		if (unlikely(rxq->err_state)) {
> -			cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
> -			++rxq->stats.idropped;
> -		} else {
> -			return len;
> -		}
> -	} while (1);
> -}
> -
> -/**
> - * Translate RX completion flags to offload flags.
> - *
> - * @param[in] cqe
> - *   Pointer to CQE.
> - *
> - * @return
> - *   Offload flags (ol_flags) for struct rte_mbuf.
> - */
> -static inline uint32_t
> -rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe)
> -{
> -	uint32_t ol_flags = 0;
> -	uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc);
> -
> -	ol_flags =
> -		TRANSPOSE(flags,
> -			  MLX5_CQE_RX_L3_HDR_VALID,
> -			  PKT_RX_IP_CKSUM_GOOD) |
> -		TRANSPOSE(flags,
> -			  MLX5_CQE_RX_L4_HDR_VALID,
> -			  PKT_RX_L4_CKSUM_GOOD);
> -	return ol_flags;
> -}
> -
> -/**
> - * Fill in mbuf fields from RX completion flags.
> - * Note that pkt->ol_flags should be initialized outside of this function.
> - *
> - * @param rxq
> - *   Pointer to RX queue.
> - * @param pkt
> - *   mbuf to fill.
> - * @param cqe
> - *   CQE to process.
> - * @param rss_hash_res
> - *   Packet RSS Hash result.
> - */
> -static inline void
> -rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
> -	       volatile struct mlx5_cqe *cqe,
> -	       volatile struct mlx5_mini_cqe8 *mcqe)
> -{
> -	/* Update packet information. */
> -	pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe, mcqe);
> -
> -	if (rxq->rss_hash) {
> -		uint32_t rss_hash_res = 0;
> -
> -		/* If compressed, take hash result from mini-CQE. */
> -		if (mcqe == NULL ||
> -		    rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH)
> -			rss_hash_res = rte_be_to_cpu_32(cqe-
> >rx_hash_res);
> -		else
> -			rss_hash_res = rte_be_to_cpu_32(mcqe-
> >rx_hash_result);
> -		if (rss_hash_res) {
> -			pkt->hash.rss = rss_hash_res;
> -			pkt->ol_flags |= PKT_RX_RSS_HASH;
> -		}
> -	}
> -	if (rxq->mark) {
> -		uint32_t mark = 0;
> -
> -		/* If compressed, take flow tag from mini-CQE. */
> -		if (mcqe == NULL ||
> -		    rxq->mcqe_format !=
> MLX5_CQE_RESP_FORMAT_FTAG_STRIDX)
> -			mark = cqe->sop_drop_qpn;
> -		else
> -			mark = ((mcqe->byte_cnt_flow & 0xff) << 8) |
> -				(mcqe->flow_tag_high << 16);
> -		if (MLX5_FLOW_MARK_IS_VALID(mark)) {
> -			pkt->ol_flags |= PKT_RX_FDIR;
> -			if (mark !=
> RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
> -				pkt->ol_flags |= PKT_RX_FDIR_ID;
> -				pkt->hash.fdir.hi =
> mlx5_flow_mark_get(mark);
> -			}
> -		}
> -	}
> -	if (rxq->dynf_meta) {
> -		uint32_t meta = cqe->flow_table_metadata &
> -				rxq->flow_meta_port_mask;
> -
> -		if (meta) {
> -			pkt->ol_flags |= rxq->flow_meta_mask;
> -			*RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset,
> -						uint32_t *) = meta;
> -		}
> -	}
> -	if (rxq->csum)
> -		pkt->ol_flags |= rxq_cq_to_ol_flags(cqe);
> -	if (rxq->vlan_strip) {
> -		bool vlan_strip;
> -
> -		if (mcqe == NULL ||
> -		    rxq->mcqe_format !=
> MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
> -			vlan_strip = cqe->hdr_type_etc &
> -				     RTE_BE16(MLX5_CQE_VLAN_STRIPPED);
> -		else
> -			vlan_strip = mcqe->hdr_type &
> -				     RTE_BE16(MLX5_CQE_VLAN_STRIPPED);
> -		if (vlan_strip) {
> -			pkt->ol_flags |= PKT_RX_VLAN |
> PKT_RX_VLAN_STRIPPED;
> -			pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info);
> -		}
> -	}
> -	if (rxq->hw_timestamp) {
> -		uint64_t ts = rte_be_to_cpu_64(cqe->timestamp);
> -
> -		if (rxq->rt_timestamp)
> -			ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts);
> -		mlx5_timestamp_set(pkt, rxq->timestamp_offset, ts);
> -		pkt->ol_flags |= rxq->timestamp_rx_flag;
> -	}
> -}
> -
> -/**
> - * DPDK callback for RX.
> - *
> - * @param dpdk_rxq
> - *   Generic pointer to RX queue structure.
> - * @param[out] pkts
> - *   Array to store received packets.
> - * @param pkts_n
> - *   Maximum number of packets in array.
> - *
> - * @return
> - *   Number of packets successfully received (<= pkts_n).
> - */
> -uint16_t
> -mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
> -{
> -	struct mlx5_rxq_data *rxq = dpdk_rxq;
> -	const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
> -	const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
> -	const unsigned int sges_n = rxq->sges_n;
> -	struct rte_mbuf *pkt = NULL;
> -	struct rte_mbuf *seg = NULL;
> -	volatile struct mlx5_cqe *cqe =
> -		&(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
> -	unsigned int i = 0;
> -	unsigned int rq_ci = rxq->rq_ci << sges_n;
> -	int len = 0; /* keep its value across iterations. */
> -
> -	while (pkts_n) {
> -		unsigned int idx = rq_ci & wqe_cnt;
> -		volatile struct mlx5_wqe_data_seg *wqe =
> -			&((volatile struct mlx5_wqe_data_seg *)rxq-
> >wqes)[idx];
> -		struct rte_mbuf *rep = (*rxq->elts)[idx];
> -		volatile struct mlx5_mini_cqe8 *mcqe = NULL;
> -
> -		if (pkt)
> -			NEXT(seg) = rep;
> -		seg = rep;
> -		rte_prefetch0(seg);
> -		rte_prefetch0(cqe);
> -		rte_prefetch0(wqe);
> -		/* Allocate the buf from the same pool. */
> -		rep = rte_mbuf_raw_alloc(seg->pool);
> -		if (unlikely(rep == NULL)) {
> -			++rxq->stats.rx_nombuf;
> -			if (!pkt) {
> -				/*
> -				 * no buffers before we even started,
> -				 * bail out silently.
> -				 */
> -				break;
> -			}
> -			while (pkt != seg) {
> -				MLX5_ASSERT(pkt != (*rxq->elts)[idx]);
> -				rep = NEXT(pkt);
> -				NEXT(pkt) = NULL;
> -				NB_SEGS(pkt) = 1;
> -				rte_mbuf_raw_free(pkt);
> -				pkt = rep;
> -			}
> -			rq_ci >>= sges_n;
> -			++rq_ci;
> -			rq_ci <<= sges_n;
> -			break;
> -		}
> -		if (!pkt) {
> -			cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
> -			len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe);
> -			if (!len) {
> -				rte_mbuf_raw_free(rep);
> -				break;
> -			}
> -			pkt = seg;
> -			MLX5_ASSERT(len >= (rxq->crc_present << 2));
> -			pkt->ol_flags &= EXT_ATTACHED_MBUF;
> -			rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe);
> -			if (rxq->crc_present)
> -				len -= RTE_ETHER_CRC_LEN;
> -			PKT_LEN(pkt) = len;
> -			if (cqe->lro_num_seg > 1) {
> -				mlx5_lro_update_hdr
> -					(rte_pktmbuf_mtod(pkt, uint8_t *),
> cqe,
> -					 mcqe, rxq, len);
> -				pkt->ol_flags |= PKT_RX_LRO;
> -				pkt->tso_segsz = len / cqe->lro_num_seg;
> -			}
> -		}
> -		DATA_LEN(rep) = DATA_LEN(seg);
> -		PKT_LEN(rep) = PKT_LEN(seg);
> -		SET_DATA_OFF(rep, DATA_OFF(seg));
> -		PORT(rep) = PORT(seg);
> -		(*rxq->elts)[idx] = rep;
> -		/*
> -		 * Fill NIC descriptor with the new buffer.  The lkey and size
> -		 * of the buffers are already known, only the buffer address
> -		 * changes.
> -		 */
> -		wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep,
> uintptr_t));
> -		/* If there's only one MR, no need to replace LKey in WQE. */
> -		if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) >
> 1))
> -			wqe->lkey = mlx5_rx_mb2mr(rxq, rep);
> -		if (len > DATA_LEN(seg)) {
> -			len -= DATA_LEN(seg);
> -			++NB_SEGS(pkt);
> -			++rq_ci;
> -			continue;
> -		}
> -		DATA_LEN(seg) = len;
> -#ifdef MLX5_PMD_SOFT_COUNTERS
> -		/* Increment bytes counter. */
> -		rxq->stats.ibytes += PKT_LEN(pkt);
> -#endif
> -		/* Return packet. */
> -		*(pkts++) = pkt;
> -		pkt = NULL;
> -		--pkts_n;
> -		++i;
> -		/* Align consumer index to the next stride. */
> -		rq_ci >>= sges_n;
> -		++rq_ci;
> -		rq_ci <<= sges_n;
> -	}
> -	if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci)))
> -		return 0;
> -	/* Update the consumer index. */
> -	rxq->rq_ci = rq_ci >> sges_n;
> -	rte_io_wmb();
> -	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
> -	rte_io_wmb();
> -	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
> -#ifdef MLX5_PMD_SOFT_COUNTERS
> -	/* Increment packets counter. */
> -	rxq->stats.ipackets += i;
> -#endif
> -	return i;
> -}
> -
> -/**
> - * Update LRO packet TCP header.
> - * The HW LRO feature doesn't update the TCP header after coalescing the
> - * TCP segments but supplies information in CQE to fill it by SW.
> - *
> - * @param tcp
> - *   Pointer to the TCP header.
> - * @param cqe
> - *   Pointer to the completion entry..
> - * @param phcsum
> - *   The L3 pseudo-header checksum.
> - */
> -static inline void
> -mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp,
> -			volatile struct mlx5_cqe *__rte_restrict cqe,
> -			uint32_t phcsum, uint8_t l4_type)
> -{
> -	/*
> -	 * The HW calculates only the TCP payload checksum, need to
> complete
> -	 * the TCP header checksum and the L3 pseudo-header checksum.
> -	 */
> -	uint32_t csum = phcsum + cqe->csum;
> -
> -	if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK ||
> -	    l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) {
> -		tcp->tcp_flags |= RTE_TCP_ACK_FLAG;
> -		tcp->recv_ack = cqe->lro_ack_seq_num;
> -		tcp->rx_win = cqe->lro_tcp_win;
> -	}
> -	if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK)
> -		tcp->tcp_flags |= RTE_TCP_PSH_FLAG;
> -	tcp->cksum = 0;
> -	csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4);
> -	csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff);
> -	csum = (~csum) & 0xffff;
> -	if (csum == 0)
> -		csum = 0xffff;
> -	tcp->cksum = csum;
> -}
> -
> -/**
> - * Update LRO packet headers.
> - * The HW LRO feature doesn't update the L3/TCP headers after coalescing
> the
> - * TCP segments but supply information in CQE to fill it by SW.
> - *
> - * @param padd
> - *   The packet address.
> - * @param cqe
> - *   Pointer to the completion entry..
> - * @param len
> - *   The packet length.
> - */
> -static inline void
> -mlx5_lro_update_hdr(uint8_t *__rte_restrict padd,
> -		    volatile struct mlx5_cqe *__rte_restrict cqe,
> -		    volatile struct mlx5_mini_cqe8 *mcqe,
> -		    struct mlx5_rxq_data *rxq, uint32_t len)
> -{
> -	union {
> -		struct rte_ether_hdr *eth;
> -		struct rte_vlan_hdr *vlan;
> -		struct rte_ipv4_hdr *ipv4;
> -		struct rte_ipv6_hdr *ipv6;
> -		struct rte_tcp_hdr *tcp;
> -		uint8_t *hdr;
> -	} h = {
> -			.hdr = padd,
> -	};
> -	uint16_t proto = h.eth->ether_type;
> -	uint32_t phcsum;
> -	uint8_t l4_type;
> -
> -	h.eth++;
> -	while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) ||
> -	       proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) {
> -		proto = h.vlan->eth_proto;
> -		h.vlan++;
> -	}
> -	if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) {
> -		h.ipv4->time_to_live = cqe->lro_min_ttl;
> -		h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr -
> padd));
> -		h.ipv4->hdr_checksum = 0;
> -		h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4);
> -		phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0);
> -		h.ipv4++;
> -	} else {
> -		h.ipv6->hop_limits = cqe->lro_min_ttl;
> -		h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd)
> -
> -						       sizeof(*h.ipv6));
> -		phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0);
> -		h.ipv6++;
> -	}
> -	if (mcqe == NULL ||
> -	    rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
> -		l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) &
> -			   MLX5_CQE_L4_TYPE_MASK) >>
> MLX5_CQE_L4_TYPE_SHIFT;
> -	else
> -		l4_type = (rte_be_to_cpu_16(mcqe->hdr_type) &
> -			   MLX5_CQE_L4_TYPE_MASK) >>
> MLX5_CQE_L4_TYPE_SHIFT;
> -	mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum, l4_type);
> -}
> -
> -void
> -mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque)
> -{
> -	struct mlx5_mprq_buf *buf = opaque;
> -
> -	if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) == 1) {
> -		rte_mempool_put(buf->mp, buf);
> -	} else if (unlikely(__atomic_sub_fetch(&buf->refcnt, 1,
> -					       __ATOMIC_RELAXED) == 0)) {
> -		__atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED);
> -		rte_mempool_put(buf->mp, buf);
> -	}
> -}
> -
> -void
> -mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf)
> -{
> -	mlx5_mprq_buf_free_cb(NULL, buf);
> -}
> -
> -/**
> - * DPDK callback for RX with Multi-Packet RQ support.
> - *
> - * @param dpdk_rxq
> - *   Generic pointer to RX queue structure.
> - * @param[out] pkts
> - *   Array to store received packets.
> - * @param pkts_n
> - *   Maximum number of packets in array.
> - *
> - * @return
> - *   Number of packets successfully received (<= pkts_n).
> - */
> -uint16_t
> -mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t
> pkts_n)
> -{
> -	struct mlx5_rxq_data *rxq = dpdk_rxq;
> -	const uint32_t strd_n = 1 << rxq->strd_num_n;
> -	const uint32_t strd_sz = 1 << rxq->strd_sz_n;
> -	const uint32_t cq_mask = (1 << rxq->cqe_n) - 1;
> -	const uint32_t wq_mask = (1 << rxq->elts_n) - 1;
> -	volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci &
> cq_mask];
> -	unsigned int i = 0;
> -	uint32_t rq_ci = rxq->rq_ci;
> -	uint16_t consumed_strd = rxq->consumed_strd;
> -	struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
> -
> -	while (i < pkts_n) {
> -		struct rte_mbuf *pkt;
> -		int ret;
> -		uint32_t len;
> -		uint16_t strd_cnt;
> -		uint16_t strd_idx;
> -		uint32_t byte_cnt;
> -		volatile struct mlx5_mini_cqe8 *mcqe = NULL;
> -		enum mlx5_rqx_code rxq_code;
> -
> -		if (consumed_strd == strd_n) {
> -			/* Replace WQE if the buffer is still in use. */
> -			mprq_buf_replace(rxq, rq_ci & wq_mask);
> -			/* Advance to the next WQE. */
> -			consumed_strd = 0;
> -			++rq_ci;
> -			buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
> -		}
> -		cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
> -		ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe);
> -		if (!ret)
> -			break;
> -		byte_cnt = ret;
> -		len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >>
> MLX5_MPRQ_LEN_SHIFT;
> -		MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
> -		if (rxq->crc_present)
> -			len -= RTE_ETHER_CRC_LEN;
> -		if (mcqe &&
> -		    rxq->mcqe_format ==
> MLX5_CQE_RESP_FORMAT_FTAG_STRIDX)
> -			strd_cnt = (len / strd_sz) + !!(len % strd_sz);
> -		else
> -			strd_cnt = (byte_cnt &
> MLX5_MPRQ_STRIDE_NUM_MASK) >>
> -				   MLX5_MPRQ_STRIDE_NUM_SHIFT;
> -		MLX5_ASSERT(strd_cnt);
> -		consumed_strd += strd_cnt;
> -		if (byte_cnt & MLX5_MPRQ_FILLER_MASK)
> -			continue;
> -		strd_idx = rte_be_to_cpu_16(mcqe == NULL ?
> -					cqe->wqe_counter :
> -					mcqe->stride_idx);
> -		MLX5_ASSERT(strd_idx < strd_n);
> -		MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci)
> &
> -			    wq_mask));
> -		pkt = rte_pktmbuf_alloc(rxq->mp);
> -		if (unlikely(pkt == NULL)) {
> -			++rxq->stats.rx_nombuf;
> -			break;
> -		}
> -		len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >>
> MLX5_MPRQ_LEN_SHIFT;
> -		MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
> -		if (rxq->crc_present)
> -			len -= RTE_ETHER_CRC_LEN;
> -		rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf,
> -					   strd_idx, strd_cnt);
> -		if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
> -			rte_pktmbuf_free_seg(pkt);
> -			if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
> -				++rxq->stats.idropped;
> -				continue;
> -			}
> -			if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
> -				++rxq->stats.rx_nombuf;
> -				break;
> -			}
> -		}
> -		rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe);
> -		if (cqe->lro_num_seg > 1) {
> -			mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt,
> uint8_t *),
> -					    cqe, mcqe, rxq, len);
> -			pkt->ol_flags |= PKT_RX_LRO;
> -			pkt->tso_segsz = len / cqe->lro_num_seg;
> -		}
> -		PKT_LEN(pkt) = len;
> -		PORT(pkt) = rxq->port_id;
> -#ifdef MLX5_PMD_SOFT_COUNTERS
> -		/* Increment bytes counter. */
> -		rxq->stats.ibytes += PKT_LEN(pkt);
> -#endif
> -		/* Return packet. */
> -		*(pkts++) = pkt;
> -		++i;
> -	}
> -	/* Update the consumer indexes. */
> -	rxq->consumed_strd = consumed_strd;
> -	rte_io_wmb();
> -	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
> -	if (rq_ci != rxq->rq_ci) {
> -		rxq->rq_ci = rq_ci;
> -		rte_io_wmb();
> -		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
> -	}
> -#ifdef MLX5_PMD_SOFT_COUNTERS
> -	/* Increment packets counter. */
> -	rxq->stats.ipackets += i;
> -#endif
> -	return i;
> -}
> -
> -/**
>   * Dummy DPDK callback for TX.
>   *
>   * This function is used to temporarily replace the real callback during
> @@ -1836,66 +719,6 @@ enum mlx5_txcmp_code {
>  }
> 
>  /**
> - * Dummy DPDK callback for RX.
> - *
> - * This function is used to temporarily replace the real callback during
> - * unsafe control operations on the queue, or in case of error.
> - *
> - * @param dpdk_rxq
> - *   Generic pointer to RX queue structure.
> - * @param[out] pkts
> - *   Array to store received packets.
> - * @param pkts_n
> - *   Maximum number of packets in array.
> - *
> - * @return
> - *   Number of packets successfully received (<= pkts_n).
> - */
> -uint16_t
> -removed_rx_burst(void *dpdk_txq __rte_unused,
> -		 struct rte_mbuf **pkts __rte_unused,
> -		 uint16_t pkts_n __rte_unused)
> -{
> -	rte_mb();
> -	return 0;
> -}
> -
> -/*
> - * Vectorized Rx/Tx routines are not compiled in when required vector
> - * instructions are not supported on a target architecture. The following null
> - * stubs are needed for linkage when those are not included outside of this
> file
> - * (e.g.  mlx5_rxtx_vec_sse.c for x86).
> - */
> -
> -__rte_weak uint16_t
> -mlx5_rx_burst_vec(void *dpdk_txq __rte_unused,
> -		  struct rte_mbuf **pkts __rte_unused,
> -		  uint16_t pkts_n __rte_unused)
> -{
> -	return 0;
> -}
> -
> -__rte_weak uint16_t
> -mlx5_rx_burst_mprq_vec(void *dpdk_txq __rte_unused,
> -		       struct rte_mbuf **pkts __rte_unused,
> -		       uint16_t pkts_n __rte_unused)
> -{
> -	return 0;
> -}
> -
> -__rte_weak int
> -mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused)
> -{
> -	return -ENOTSUP;
> -}
> -
> -__rte_weak int
> -mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused)
> -{
> -	return -ENOTSUP;
> -}
> -
> -/**
>   * Free the mbufs from the linear array of pointers.
>   *
>   * @param txq
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index d443db4..f1ebc99 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -196,6 +196,8 @@ void mlx5_dump_debug_information(const char
> *path, const char *title,
>  				 const void *buf, unsigned int len);
>  int mlx5_queue_state_modify_primary(struct rte_eth_dev *dev,
>  			const struct mlx5_mp_arg_queue_state_modify
> *sm);
> +int mlx5_queue_state_modify(struct rte_eth_dev *dev,
> +			    struct mlx5_mp_arg_queue_state_modify *sm);
>  void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
>  		       struct rte_eth_txq_info *qinfo);
>  int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t
> tx_queue_id,
> --
> 1.8.3.1


  reply	other threads:[~2021-04-06  9:28 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-05 14:00 [dpdk-dev] [PATCH 0/6] net/mlx5: reduce Tx datapath compile time Michael Baum
2021-04-05 14:00 ` [dpdk-dev] [PATCH 1/6] net/mlx5: separate Rx function declarations to another file Michael Baum
2021-04-06  9:27   ` Slava Ovsiienko
2021-04-05 14:00 ` [dpdk-dev] [PATCH 2/6] net/mlx5: separate Rx function implementations to new file Michael Baum
2021-04-06  9:27   ` Slava Ovsiienko [this message]
2021-04-05 14:00 ` [dpdk-dev] [PATCH 3/6] net/mlx5: separate Tx function declarations to another file Michael Baum
2021-04-06  9:28   ` Slava Ovsiienko
2021-04-07 11:33   ` Raslan Darawsheh
2021-04-05 14:00 ` [dpdk-dev] [PATCH 4/6] net/mlx5: separate Tx burst template to header file Michael Baum
2021-04-06  9:28   ` Slava Ovsiienko
2021-04-05 14:00 ` [dpdk-dev] [PATCH 5/6] net/mlx5: separate Tx function implementations to new file Michael Baum
2021-04-06  9:29   ` Slava Ovsiienko
2021-04-05 14:00 ` [dpdk-dev] [PATCH 6/6] net/mlx5: separate Tx burst functions to different files Michael Baum
2021-04-06  9:30   ` Slava Ovsiienko
2021-04-06  9:33 ` [dpdk-dev] [PATCH 0/6] net/mlx5: reduce Tx datapath compile time David Marchand
2021-04-06  9:58   ` Slava Ovsiienko
2021-04-12  6:32 ` [dpdk-dev] [PATCH v2 " Michael Baum
2021-04-12  6:32   ` [dpdk-dev] [PATCH v2 1/6] net/mlx5: separate Rx function declarations to another file Michael Baum
2021-04-12  6:32   ` [dpdk-dev] [PATCH v2 2/6] net/mlx5: separate Rx function implementations to new file Michael Baum
2021-04-16 13:08     ` Ferruh Yigit
2021-04-12  6:32   ` [dpdk-dev] [PATCH v2 3/6] net/mlx5: separate Tx function declarations to another file Michael Baum
2021-04-12  6:32   ` [dpdk-dev] [PATCH v2 4/6] net/mlx5: separate Tx burst template to header file Michael Baum
2021-04-12  6:32   ` [dpdk-dev] [PATCH v2 5/6] net/mlx5: separate Tx function implementations to new file Michael Baum
2021-04-12  6:32   ` [dpdk-dev] [PATCH v2 6/6] net/mlx5: separate Tx burst functions to different files Michael Baum
2021-04-15  6:27   ` [dpdk-dev] [PATCH v2 0/6] net/mlx5: reduce Tx datapath compile time Raslan Darawsheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DM6PR12MB3753DADA41BAB1D82696413CDF769@DM6PR12MB3753.namprd12.prod.outlook.com \
    --to=viacheslavo@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=matan@nvidia.com \
    --cc=michaelba@nvidia.com \
    --cc=rasland@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ https://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git