- * [dpdk-dev] [PATCH 01/12] net/bnxt: fix burst mode get for Arm
  2020-09-09 15:52 [dpdk-dev] [PATCH 00/12] net/bnxt: vector PMD improvements Lance Richardson
@ 2020-09-09 15:52 ` Lance Richardson
  2020-09-09 15:52 ` [dpdk-dev] [PATCH 02/12] net/bnxt: fix rxq/txq get information Lance Richardson
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Lance Richardson @ 2020-09-09 15:52 UTC (permalink / raw)
  To: Ajit Khaparde, Somnath Kotur, Ruifeng Wang; +Cc: dev, stable
Transmit and receive burst mode get operations incorrectly return
"Vector SSE" on ARM64 platforms, change to return "Vector Neon"
instead.
Fixes: 3983583414 ("net/bnxt: support NEON")
Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
Cc: stable@dpdk.org
---
 drivers/net/bnxt/bnxt_ethdev.c | 60 +++++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 22 deletions(-)
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 75d055be00..7a77922c0c 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -2615,46 +2615,62 @@ bnxt_txq_info_get_op(struct rte_eth_dev *dev, uint16_t queue_id,
 	qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
 }
 
+static const struct {
+	eth_rx_burst_t pkt_burst;
+	const char *info;
+} bnxt_rx_burst_info[] = {
+	{bnxt_recv_pkts,	"Scalar"},
+#if defined(RTE_ARCH_X86)
+	{bnxt_recv_pkts_vec,	"Vector SSE"},
+#elif defined(RTE_ARCH_ARM64)
+	{bnxt_recv_pkts_vec,	"Vector Neon"},
+#endif
+};
+
 static int
 bnxt_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
 		       struct rte_eth_burst_mode *mode)
 {
 	eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
+	size_t i;
 
-	if (pkt_burst == bnxt_recv_pkts) {
-		snprintf(mode->info, sizeof(mode->info), "%s",
-			 "Scalar");
-		return 0;
-	}
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
-	if (pkt_burst == bnxt_recv_pkts_vec) {
-		snprintf(mode->info, sizeof(mode->info), "%s",
-			 "Vector SSE");
-		return 0;
+	for (i = 0; i < RTE_DIM(bnxt_rx_burst_info); i++) {
+		if (pkt_burst == bnxt_rx_burst_info[i].pkt_burst) {
+			snprintf(mode->info, sizeof(mode->info), "%s",
+				 bnxt_rx_burst_info[i].info);
+			return 0;
+		}
 	}
-#endif
 
 	return -EINVAL;
 }
 
+static const struct {
+	eth_tx_burst_t pkt_burst;
+	const char *info;
+} bnxt_tx_burst_info[] = {
+	{bnxt_xmit_pkts,	"Scalar"},
+#if defined(RTE_ARCH_X86)
+	{bnxt_xmit_pkts_vec,	"Vector SSE"},
+#elif defined(RTE_ARCH_ARM64)
+	{bnxt_xmit_pkts_vec,	"Vector Neon"},
+#endif
+};
+
 static int
 bnxt_tx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
 		       struct rte_eth_burst_mode *mode)
 {
 	eth_tx_burst_t pkt_burst = dev->tx_pkt_burst;
+	size_t i;
 
-	if (pkt_burst == bnxt_xmit_pkts) {
-		snprintf(mode->info, sizeof(mode->info), "%s",
-			 "Scalar");
-		return 0;
-	}
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
-	if (pkt_burst == bnxt_xmit_pkts_vec) {
-		snprintf(mode->info, sizeof(mode->info), "%s",
-			 "Vector SSE");
-		return 0;
+	for (i = 0; i < RTE_DIM(bnxt_tx_burst_info); i++) {
+		if (pkt_burst == bnxt_tx_burst_info[i].pkt_burst) {
+			snprintf(mode->info, sizeof(mode->info), "%s",
+				 bnxt_tx_burst_info[i].info);
+			return 0;
+		}
 	}
-#endif
 
 	return -EINVAL;
 }
-- 
2.25.1
^ permalink raw reply	[flat|nested] 22+ messages in thread
- * [dpdk-dev] [PATCH 02/12] net/bnxt: fix rxq/txq get information
  2020-09-09 15:52 [dpdk-dev] [PATCH 00/12] net/bnxt: vector PMD improvements Lance Richardson
  2020-09-09 15:52 ` [dpdk-dev] [PATCH 01/12] net/bnxt: fix burst mode get for Arm Lance Richardson
@ 2020-09-09 15:52 ` Lance Richardson
  2020-09-11 14:41   ` Ferruh Yigit
  2020-09-09 15:52 ` [dpdk-dev] [PATCH 03/12] net/bnxt: use appropriate type for Rx mbuf ring Lance Richardson
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 22+ messages in thread
From: Lance Richardson @ 2020-09-09 15:52 UTC (permalink / raw)
  To: Ajit Khaparde, Somnath Kotur; +Cc: dev, stable
Return correct values for Rx/Tx offloads and for rx_drop_en.
Fixes: 2fc201884be8 ("net/bnxt: support rxq/txq get information")
Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
Cc: stable@dpdk.org
---
 drivers/net/bnxt/bnxt_ethdev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 7a77922c0c..5585f872d0 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -2588,8 +2588,9 @@ bnxt_rxq_info_get_op(struct rte_eth_dev *dev, uint16_t queue_id,
 	qinfo->nb_desc = rxq->nb_rx_desc;
 
 	qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
-	qinfo->conf.rx_drop_en = 0;
+	qinfo->conf.rx_drop_en = 1;
 	qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
+	qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
 }
 
 static void
@@ -2613,6 +2614,7 @@ bnxt_txq_info_get_op(struct rte_eth_dev *dev, uint16_t queue_id,
 	qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
 	qinfo->conf.tx_rs_thresh = 0;
 	qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
+	qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads;
 }
 
 static const struct {
-- 
2.25.1
^ permalink raw reply	[flat|nested] 22+ messages in thread
- * Re: [dpdk-dev] [PATCH 02/12] net/bnxt: fix rxq/txq get information
  2020-09-09 15:52 ` [dpdk-dev] [PATCH 02/12] net/bnxt: fix rxq/txq get information Lance Richardson
@ 2020-09-11 14:41   ` Ferruh Yigit
  2020-09-18 18:41     ` Lance Richardson
  0 siblings, 1 reply; 22+ messages in thread
From: Ferruh Yigit @ 2020-09-11 14:41 UTC (permalink / raw)
  To: Lance Richardson, Ajit Khaparde, Somnath Kotur; +Cc: dev, stable
On 9/9/2020 4:52 PM, Lance Richardson wrote:
> Return correct values for Rx/Tx offloads and for rx_drop_en.
> 
> Fixes: 2fc201884be8 ("net/bnxt: support rxq/txq get information")
> Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
> Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
> Cc: stable@dpdk.org
> ---
>  drivers/net/bnxt/bnxt_ethdev.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
> index 7a77922c0c..5585f872d0 100644
> --- a/drivers/net/bnxt/bnxt_ethdev.c
> +++ b/drivers/net/bnxt/bnxt_ethdev.c
> @@ -2588,8 +2588,9 @@ bnxt_rxq_info_get_op(struct rte_eth_dev *dev, uint16_t queue_id,
>  	qinfo->nb_desc = rxq->nb_rx_desc;
>  
>  	qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
> -	qinfo->conf.rx_drop_en = 0;
> +	qinfo->conf.rx_drop_en = 1;
Why 0 is wrong but 1 is correct?
Technically 'rx_drop_en' is a user configuration, which is set via
'rte_eth_rx_queue_setup()' API.
bnxt seems not honoring this config option at all.
Based on HW capability, I think two things can be done,
1) Configure the HW based on config request, and return configured value in
'bnxt_rxq_info_get_op()'. see 'ixgbe'.
2) If HW is not configurable, check the value in 'rte_eth_rx_queue_setup()'
a) return error if unsupported value requested. see 'sfc'.
b) log a warning and overwrite the requested config with whatever supported.
And for both a & b, return current config in the 'bnxt_rxq_info_get_op()'
>  	qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
> +	qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
This is for queue specific offloads, you are returning port offloads.
As far as I can see bnxt doesn't have any queue specific offload, so this can be
dropped.
^ permalink raw reply	[flat|nested] 22+ messages in thread
- * Re: [dpdk-dev] [PATCH 02/12] net/bnxt: fix rxq/txq get information
  2020-09-11 14:41   ` Ferruh Yigit
@ 2020-09-18 18:41     ` Lance Richardson
  2020-09-21 11:05       ` Ferruh Yigit
  0 siblings, 1 reply; 22+ messages in thread
From: Lance Richardson @ 2020-09-18 18:41 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: Ajit Khaparde, Somnath Kotur, dev, stable
On Fri, Sep 11, 2020 at 10:41 AM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>
> > -     qinfo->conf.rx_drop_en = 0;
> > +     qinfo->conf.rx_drop_en = 1;
>
> Why 0 is wrong but 1 is correct?
>
> Technically 'rx_drop_en' is a user configuration, which is set via
> 'rte_eth_rx_queue_setup()' API.
>
> bnxt seems not honoring this config option at all.
>
> Based on HW capability, I think two things can be done,
> 1) Configure the HW based on config request, and return configured value in
> 'bnxt_rxq_info_get_op()'. see 'ixgbe'.
>
> 2) If HW is not configurable, check the value in 'rte_eth_rx_queue_setup()'
> a) return error if unsupported value requested. see 'sfc'.
> b) log a warning and overwrite the requested config with whatever supported.
> And for both a & b, return current config in the 'bnxt_rxq_info_get_op()'
>
> >       qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
> > +     qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
>
Hi Ferruh,
Apologies, this somehow didn't make it to my inbox.
I believe case (2) applies here, rx_drop_en is not currently configurable in hw,
so this change was intended to accurately report the effective value. I'm not
sure whether (2a) or (2b) would be better, but (2b) seems less likely to cause
issues for existing applications.
>
> This is for queue specific offloads, you are returning port offloads.
> As far as I can see bnxt doesn't have any queue specific offload, so this can be
> dropped.
It wasn't clear to me whether this was intended to report the difference between
the offload configuration for the queue and the offload configuration
for the port
or the effective offload configuration for the queue. I noticed that
several other
PMDs (e.g. mlx5, netvsc, sfc) report the offload configuration for the port in
rx/tx_queue_info_get(). The sfc PMD reports the offload configuration for the
port combined with queue-specific offloads, based on those examples this
seemed to be correct. I guess you're saying those are also incorrect?
Thanks,
   Lance
^ permalink raw reply	[flat|nested] 22+ messages in thread 
- * Re: [dpdk-dev] [PATCH 02/12] net/bnxt: fix rxq/txq get information
  2020-09-18 18:41     ` Lance Richardson
@ 2020-09-21 11:05       ` Ferruh Yigit
  0 siblings, 0 replies; 22+ messages in thread
From: Ferruh Yigit @ 2020-09-21 11:05 UTC (permalink / raw)
  To: Lance Richardson; +Cc: Ajit Khaparde, Somnath Kotur, dev, stable
On 9/18/2020 7:41 PM, Lance Richardson wrote:
> On Fri, Sep 11, 2020 at 10:41 AM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>>
>>> -     qinfo->conf.rx_drop_en = 0;
>>> +     qinfo->conf.rx_drop_en = 1;
>>
>> Why 0 is wrong but 1 is correct?
>>
>> Technically 'rx_drop_en' is a user configuration, which is set via
>> 'rte_eth_rx_queue_setup()' API.
>>
>> bnxt seems not honoring this config option at all.
>>
>> Based on HW capability, I think two things can be done,
>> 1) Configure the HW based on config request, and return configured value in
>> 'bnxt_rxq_info_get_op()'. see 'ixgbe'.
>>
>> 2) If HW is not configurable, check the value in 'rte_eth_rx_queue_setup()'
>> a) return error if unsupported value requested. see 'sfc'.
>> b) log a warning and overwrite the requested config with whatever supported.
>> And for both a & b, return current config in the 'bnxt_rxq_info_get_op()'
>>
>>>        qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
>>> +     qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
>>
> Hi Ferruh,
> 
> Apologies, this somehow didn't make it to my inbox.
> 
> I believe case (2) applies here, rx_drop_en is not currently configurable in hw,
> so this change was intended to accurately report the effective value. I'm not
> sure whether (2a) or (2b) would be better, but (2b) seems less likely to cause
> issues for existing applications.
 >
I guess (2a) may cause more issues with existing applications, 
applications previously running without problem may now start failing if 
PMD starts returning error.
In (2b) execution will be same, and if PMD already doesn't support NOT 
dropping, functionality will be same, only configuration will reflect 
what is actually happening instead of what user thinks happening.
> 
>>
>> This is for queue specific offloads, you are returning port offloads.
>> As far as I can see bnxt doesn't have any queue specific offload, so this can be
>> dropped.
> 
> It wasn't clear to me whether this was intended to report the difference between
> the offload configuration for the queue and the offload configuration
> for the port
> or the effective offload configuration for the queue. I noticed that
> several other
> PMDs (e.g. mlx5, netvsc, sfc) report the offload configuration for the port in
> rx/tx_queue_info_get(). The sfc PMD reports the offload configuration for the
> port combined with queue-specific offloads, based on those examples this
> seemed to be correct. I guess you're saying those are also incorrect?
> 
May bad, they look OK, "the effective offload configuration for the 
queue" makes more sense here.
I guess I confused same fields should hold the queue specific offloads 
when used for 'rte_eth_rx_queue_setup()'
^ permalink raw reply	[flat|nested] 22+ messages in thread 
 
 
 
- * [dpdk-dev] [PATCH 03/12] net/bnxt: use appropriate type for Rx mbuf ring
  2020-09-09 15:52 [dpdk-dev] [PATCH 00/12] net/bnxt: vector PMD improvements Lance Richardson
  2020-09-09 15:52 ` [dpdk-dev] [PATCH 01/12] net/bnxt: fix burst mode get for Arm Lance Richardson
  2020-09-09 15:52 ` [dpdk-dev] [PATCH 02/12] net/bnxt: fix rxq/txq get information Lance Richardson
@ 2020-09-09 15:52 ` Lance Richardson
  2020-09-09 15:52 ` [dpdk-dev] [PATCH 04/12] net/bnxt: require async cq for vector mode Lance Richardson
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Lance Richardson @ 2020-09-09 15:52 UTC (permalink / raw)
  To: Ajit Khaparde, Somnath Kotur; +Cc: dev
Change the type of the software receive mbuf ring from an array
of structures containing an mbuf pointer to an array of pointers
to struct rte_mbuf for consistency with how this ring is currently
used by the vector mode receive function.
Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
---
 drivers/net/bnxt/bnxt_ethdev.c        |  6 ++--
 drivers/net/bnxt/bnxt_reps.c          | 21 +++++++-------
 drivers/net/bnxt/bnxt_ring.c          |  4 +--
 drivers/net/bnxt/bnxt_rxq.c           | 14 ++++-----
 drivers/net/bnxt/bnxt_rxr.c           | 41 ++++++++++++++-------------
 drivers/net/bnxt/bnxt_rxr.h           |  8 ++----
 drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 10 +++----
 drivers/net/bnxt/bnxt_rxtx_vec_sse.c  | 10 +++----
 8 files changed, 55 insertions(+), 59 deletions(-)
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 5585f872d0..c57c5cc2af 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -2843,7 +2843,7 @@ bnxt_rx_descriptor_status_op(void *rx_queue, uint16_t offset)
 	struct bnxt_rx_queue *rxq = (struct bnxt_rx_queue *)rx_queue;
 	struct bnxt_rx_ring_info *rxr;
 	struct bnxt_cp_ring_info *cpr;
-	struct bnxt_sw_rx_bd *rx_buf;
+	struct rte_mbuf *rx_buf;
 	struct rx_pkt_cmpl *rxcmp;
 	uint32_t cons, cp_cons;
 	int rc;
@@ -2872,8 +2872,8 @@ bnxt_rx_descriptor_status_op(void *rx_queue, uint16_t offset)
 		if (CMPL_VALID(rxcmp, !cpr->valid))
 			return RTE_ETH_RX_DESC_DONE;
 	}
-	rx_buf = &rxr->rx_buf_ring[cons];
-	if (rx_buf->mbuf == NULL)
+	rx_buf = rxr->rx_buf_ring[cons];
+	if (rx_buf == NULL)
 		return RTE_ETH_RX_DESC_UNAVAIL;
 
 
diff --git a/drivers/net/bnxt/bnxt_reps.c b/drivers/net/bnxt/bnxt_reps.c
index a1b2c4bf97..bea9f3d1c1 100644
--- a/drivers/net/bnxt/bnxt_reps.c
+++ b/drivers/net/bnxt/bnxt_reps.c
@@ -35,7 +35,7 @@ static const struct eth_dev_ops bnxt_vf_rep_dev_ops = {
 uint16_t
 bnxt_vfr_recv(uint16_t port_id, uint16_t queue_id, struct rte_mbuf *mbuf)
 {
-	struct bnxt_sw_rx_bd *prod_rx_buf;
+	struct rte_mbuf **prod_rx_buf;
 	struct bnxt_rx_ring_info *rep_rxr;
 	struct bnxt_rx_queue *rep_rxq;
 	struct rte_eth_dev *vfr_eth_dev;
@@ -54,10 +54,9 @@ bnxt_vfr_recv(uint16_t port_id, uint16_t queue_id, struct rte_mbuf *mbuf)
 	mask = rep_rxr->rx_ring_struct->ring_mask;
 
 	/* Put this mbuf on the RxQ of the Representor */
-	prod_rx_buf =
-		&rep_rxr->rx_buf_ring[rep_rxr->rx_prod++ & mask];
-	if (!prod_rx_buf->mbuf) {
-		prod_rx_buf->mbuf = mbuf;
+	prod_rx_buf = &rep_rxr->rx_buf_ring[rep_rxr->rx_prod++ & mask];
+	if (!*prod_rx_buf) {
+		*prod_rx_buf = mbuf;
 		vfr_bp->rx_bytes[que] += mbuf->pkt_len;
 		vfr_bp->rx_pkts[que]++;
 	} else {
@@ -75,7 +74,7 @@ bnxt_vf_rep_rx_burst(void *rx_queue,
 		     uint16_t nb_pkts)
 {
 	struct bnxt_rx_queue *rxq = rx_queue;
-	struct bnxt_sw_rx_bd *cons_rx_buf;
+	struct rte_mbuf **cons_rx_buf;
 	struct bnxt_rx_ring_info *rxr;
 	uint16_t nb_rx_pkts = 0;
 	uint16_t mask, i;
@@ -87,11 +86,11 @@ bnxt_vf_rep_rx_burst(void *rx_queue,
 	mask = rxr->rx_ring_struct->ring_mask;
 	for (i = 0; i < nb_pkts; i++) {
 		cons_rx_buf = &rxr->rx_buf_ring[rxr->rx_cons & mask];
-		if (!cons_rx_buf->mbuf)
+		if (*cons_rx_buf == NULL)
 			return nb_rx_pkts;
-		rx_pkts[nb_rx_pkts] = cons_rx_buf->mbuf;
+		rx_pkts[nb_rx_pkts] = *cons_rx_buf;
 		rx_pkts[nb_rx_pkts]->port = rxq->port_id;
-		cons_rx_buf->mbuf = NULL;
+		*cons_rx_buf = NULL;
 		nb_rx_pkts++;
 		rxr->rx_cons++;
 	}
@@ -559,7 +558,7 @@ int bnxt_vf_rep_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
 	struct bnxt *parent_bp = rep_bp->parent_dev->data->dev_private;
 	struct bnxt_rx_queue *parent_rxq;
 	struct bnxt_rx_queue *rxq;
-	struct bnxt_sw_rx_bd *buf_ring;
+	struct rte_mbuf **buf_ring;
 	int rc = 0;
 
 	if (queue_idx >= BNXT_MAX_VF_REP_RINGS) {
@@ -611,7 +610,7 @@ int bnxt_vf_rep_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
 		goto out;
 
 	buf_ring = rte_zmalloc_socket("bnxt_rx_vfr_buf_ring",
-				      sizeof(struct bnxt_sw_rx_bd) *
+				      sizeof(struct rte_mbuf *) *
 				      rxq->rx_ring->rx_ring_struct->ring_size,
 				      RTE_CACHE_LINE_SIZE, socket_id);
 	if (!buf_ring) {
diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c
index 8f2296b293..f7f6ee8049 100644
--- a/drivers/net/bnxt/bnxt_ring.c
+++ b/drivers/net/bnxt/bnxt_ring.c
@@ -251,7 +251,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
 			rx_ring->vmem =
 			    (void **)((char *)mz->addr + rx_vmem_start);
 			rx_ring_info->rx_buf_ring =
-			    (struct bnxt_sw_rx_bd *)rx_ring->vmem;
+			    (struct rte_mbuf **)rx_ring->vmem;
 		}
 
 		rx_ring = rx_ring_info->ag_ring_struct;
@@ -269,7 +269,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
 			rx_ring->vmem =
 			    (void **)((char *)mz->addr + ag_vmem_start);
 			rx_ring_info->ag_buf_ring =
-			    (struct bnxt_sw_rx_bd *)rx_ring->vmem;
+			    (struct rte_mbuf **)rx_ring->vmem;
 		}
 
 		rx_ring_info->ag_bitmap =
diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c
index e42308a97f..db9aa1f3ed 100644
--- a/drivers/net/bnxt/bnxt_rxq.c
+++ b/drivers/net/bnxt/bnxt_rxq.c
@@ -197,7 +197,7 @@ int bnxt_mq_rx_configure(struct bnxt *bp)
 
 void bnxt_rx_queue_release_mbufs(struct bnxt_rx_queue *rxq)
 {
-	struct bnxt_sw_rx_bd *sw_ring;
+	struct rte_mbuf **sw_ring;
 	struct bnxt_tpa_info *tpa_info;
 	uint16_t i;
 
@@ -210,9 +210,9 @@ void bnxt_rx_queue_release_mbufs(struct bnxt_rx_queue *rxq)
 	if (sw_ring) {
 		for (i = 0;
 		     i < rxq->rx_ring->rx_ring_struct->ring_size; i++) {
-			if (sw_ring[i].mbuf) {
-				rte_pktmbuf_free_seg(sw_ring[i].mbuf);
-				sw_ring[i].mbuf = NULL;
+			if (sw_ring[i]) {
+				rte_pktmbuf_free_seg(sw_ring[i]);
+				sw_ring[i] = NULL;
 			}
 		}
 	}
@@ -221,9 +221,9 @@ void bnxt_rx_queue_release_mbufs(struct bnxt_rx_queue *rxq)
 	if (sw_ring) {
 		for (i = 0;
 		     i < rxq->rx_ring->ag_ring_struct->ring_size; i++) {
-			if (sw_ring[i].mbuf) {
-				rte_pktmbuf_free_seg(sw_ring[i].mbuf);
-				sw_ring[i].mbuf = NULL;
+			if (sw_ring[i]) {
+				rte_pktmbuf_free_seg(sw_ring[i]);
+				sw_ring[i] = NULL;
 			}
 		}
 	}
diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index b086898148..92102e3d57 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -42,7 +42,7 @@ static inline int bnxt_alloc_rx_data(struct bnxt_rx_queue *rxq,
 				     uint16_t prod)
 {
 	struct rx_prod_pkt_bd *rxbd = &rxr->rx_desc_ring[prod];
-	struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[prod];
+	struct rte_mbuf **rx_buf = &rxr->rx_buf_ring[prod];
 	struct rte_mbuf *mbuf;
 
 	mbuf = __bnxt_alloc_rx_data(rxq->mb_pool);
@@ -51,7 +51,7 @@ static inline int bnxt_alloc_rx_data(struct bnxt_rx_queue *rxq,
 		return -ENOMEM;
 	}
 
-	rx_buf->mbuf = mbuf;
+	*rx_buf = mbuf;
 	mbuf->data_off = RTE_PKTMBUF_HEADROOM;
 
 	rxbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
@@ -64,7 +64,7 @@ static inline int bnxt_alloc_ag_data(struct bnxt_rx_queue *rxq,
 				     uint16_t prod)
 {
 	struct rx_prod_pkt_bd *rxbd = &rxr->ag_desc_ring[prod];
-	struct bnxt_sw_rx_bd *rx_buf = &rxr->ag_buf_ring[prod];
+	struct rte_mbuf **rx_buf = &rxr->ag_buf_ring[prod];
 	struct rte_mbuf *mbuf;
 
 	if (rxbd == NULL) {
@@ -83,7 +83,7 @@ static inline int bnxt_alloc_ag_data(struct bnxt_rx_queue *rxq,
 		return -ENOMEM;
 	}
 
-	rx_buf->mbuf = mbuf;
+	*rx_buf = mbuf;
 	mbuf->data_off = RTE_PKTMBUF_HEADROOM;
 
 	rxbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
@@ -95,15 +95,15 @@ static inline void bnxt_reuse_rx_mbuf(struct bnxt_rx_ring_info *rxr,
 			       struct rte_mbuf *mbuf)
 {
 	uint16_t prod = RING_NEXT(rxr->rx_ring_struct, rxr->rx_prod);
-	struct bnxt_sw_rx_bd *prod_rx_buf;
+	struct rte_mbuf **prod_rx_buf;
 	struct rx_prod_pkt_bd *prod_bd;
 
 	prod_rx_buf = &rxr->rx_buf_ring[prod];
 
-	RTE_ASSERT(prod_rx_buf->mbuf == NULL);
+	RTE_ASSERT(*prod_rx_buf == NULL);
 	RTE_ASSERT(mbuf != NULL);
 
-	prod_rx_buf->mbuf = mbuf;
+	*prod_rx_buf = mbuf;
 
 	prod_bd = &rxr->rx_desc_ring[prod];
 
@@ -116,13 +116,14 @@ static inline
 struct rte_mbuf *bnxt_consume_rx_buf(struct bnxt_rx_ring_info *rxr,
 				     uint16_t cons)
 {
-	struct bnxt_sw_rx_bd *cons_rx_buf;
+	struct rte_mbuf **cons_rx_buf;
 	struct rte_mbuf *mbuf;
 
 	cons_rx_buf = &rxr->rx_buf_ring[cons];
-	RTE_ASSERT(cons_rx_buf->mbuf != NULL);
-	mbuf = cons_rx_buf->mbuf;
-	cons_rx_buf->mbuf = NULL;
+	RTE_ASSERT(*cons_rx_buf != NULL);
+	mbuf = *cons_rx_buf;
+	*cons_rx_buf = NULL;
+
 	return mbuf;
 }
 
@@ -226,7 +227,7 @@ static int bnxt_rx_pages(struct bnxt_rx_queue *rxq,
 	bool is_thor_tpa = tpa_info && BNXT_CHIP_THOR(rxq->bp);
 
 	for (i = 0; i < agg_buf; i++) {
-		struct bnxt_sw_rx_bd *ag_buf;
+		struct rte_mbuf **ag_buf;
 		struct rte_mbuf *ag_mbuf;
 
 		if (is_thor_tpa) {
@@ -245,7 +246,7 @@ static int bnxt_rx_pages(struct bnxt_rx_queue *rxq,
 		ag_cons = rxcmp->opaque;
 		RTE_ASSERT(ag_cons <= rxr->ag_ring_struct->ring_mask);
 		ag_buf = &rxr->ag_buf_ring[ag_cons];
-		ag_mbuf = ag_buf->mbuf;
+		ag_mbuf = *ag_buf;
 		RTE_ASSERT(ag_mbuf != NULL);
 
 		ag_mbuf->data_len = rte_le_to_cpu_16(rxcmp->len);
@@ -256,7 +257,7 @@ static int bnxt_rx_pages(struct bnxt_rx_queue *rxq,
 		last->next = ag_mbuf;
 		last = ag_mbuf;
 
-		ag_buf->mbuf = NULL;
+		*ag_buf = NULL;
 
 		/*
 		 * As aggregation buffer consumed out of order in TPA module,
@@ -866,10 +867,10 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 		for (; cnt;
 			i = RING_NEXT(rxr->rx_ring_struct, i), cnt--) {
-			struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[i];
+			struct rte_mbuf **rx_buf = &rxr->rx_buf_ring[i];
 
 			/* Buffer already allocated for this index. */
-			if (rx_buf->mbuf != NULL)
+			if (*rx_buf != NULL)
 				continue;
 
 			/* This slot is empty. Alloc buffer for Rx */
@@ -960,7 +961,7 @@ int bnxt_init_rx_ring_struct(struct bnxt_rx_queue *rxq, unsigned int socket_id)
 	ring->ring_mask = ring->ring_size - 1;
 	ring->bd = (void *)rxr->rx_desc_ring;
 	ring->bd_dma = rxr->rx_desc_mapping;
-	ring->vmem_size = ring->ring_size * sizeof(struct bnxt_sw_rx_bd);
+	ring->vmem_size = ring->ring_size * sizeof(struct rte_mbuf *);
 	ring->vmem = (void **)&rxr->rx_buf_ring;
 	ring->fw_ring_id = INVALID_HW_RING_ID;
 
@@ -998,7 +999,7 @@ int bnxt_init_rx_ring_struct(struct bnxt_rx_queue *rxq, unsigned int socket_id)
 	ring->ring_mask = ring->ring_size - 1;
 	ring->bd = (void *)rxr->ag_desc_ring;
 	ring->bd_dma = rxr->ag_desc_mapping;
-	ring->vmem_size = ring->ring_size * sizeof(struct bnxt_sw_rx_bd);
+	ring->vmem_size = ring->ring_size * sizeof(struct rte_mbuf *);
 	ring->vmem = (void **)&rxr->ag_buf_ring;
 	ring->fw_ring_id = INVALID_HW_RING_ID;
 
@@ -1039,7 +1040,7 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq)
 
 	prod = rxr->rx_prod;
 	for (i = 0; i < ring->ring_size; i++) {
-		if (unlikely(!rxr->rx_buf_ring[i].mbuf)) {
+		if (unlikely(!rxr->rx_buf_ring[i])) {
 			if (bnxt_alloc_rx_data(rxq, rxr, prod) != 0) {
 				PMD_DRV_LOG(WARNING,
 					    "init'ed rx ring %d with %d/%d mbufs only\n",
@@ -1057,7 +1058,7 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq)
 	prod = rxr->ag_prod;
 
 	for (i = 0; i < ring->ring_size; i++) {
-		if (unlikely(!rxr->ag_buf_ring[i].mbuf)) {
+		if (unlikely(!rxr->ag_buf_ring[i])) {
 			if (bnxt_alloc_ag_data(rxq, rxr, prod) != 0) {
 				PMD_DRV_LOG(WARNING,
 					    "init'ed AG ring %d with %d/%d mbufs only\n",
diff --git a/drivers/net/bnxt/bnxt_rxr.h b/drivers/net/bnxt/bnxt_rxr.h
index 2bf46cd910..5b9b5f3108 100644
--- a/drivers/net/bnxt/bnxt_rxr.h
+++ b/drivers/net/bnxt/bnxt_rxr.h
@@ -181,10 +181,6 @@ struct bnxt_tpa_info {
 	struct rx_tpa_v2_abuf_cmpl	agg_arr[TPA_MAX_NUM_SEGS];
 };
 
-struct bnxt_sw_rx_bd {
-	struct rte_mbuf		*mbuf; /* data associated with RX descriptor */
-};
-
 struct bnxt_rx_ring_info {
 	uint16_t		rx_prod;
 	uint16_t		ag_prod;
@@ -194,8 +190,8 @@ struct bnxt_rx_ring_info {
 
 	struct rx_prod_pkt_bd	*rx_desc_ring;
 	struct rx_prod_pkt_bd	*ag_desc_ring;
-	struct bnxt_sw_rx_bd	*rx_buf_ring; /* sw ring */
-	struct bnxt_sw_rx_bd	*ag_buf_ring; /* sw ring */
+	struct rte_mbuf		**rx_buf_ring; /* sw ring */
+	struct rte_mbuf		**ag_buf_ring; /* sw ring */
 
 	rte_iova_t		rx_desc_mapping;
 	rte_iova_t		ag_desc_mapping;
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
index bf76c2ac26..eff196f3a0 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
@@ -29,7 +29,7 @@ static inline void
 bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
 {
 	struct rx_prod_pkt_bd *rxbds = &rxr->rx_desc_ring[rxq->rxrearm_start];
-	struct bnxt_sw_rx_bd *rx_bufs = &rxr->rx_buf_ring[rxq->rxrearm_start];
+	struct rte_mbuf **rx_bufs = &rxr->rx_buf_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	int i;
 
@@ -51,8 +51,8 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
 		uint64x2_t buf_addr0, buf_addr1;
 		uint64x2_t rxbd0, rxbd1;
 
-		mb0 = rx_bufs[0].mbuf;
-		mb1 = rx_bufs[1].mbuf;
+		mb0 = rx_bufs[0];
+		mb1 = rx_bufs[1];
 
 		/* Load address fields from both mbufs */
 		buf_addr0 = vld1q_u64((uint64_t *)&mb0->buf_addr);
@@ -260,9 +260,9 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			raw_cons = tmp_raw_cons;
 			cons = rxcmp->opaque;
 
-			mbuf = rxr->rx_buf_ring[cons].mbuf;
+			mbuf = rxr->rx_buf_ring[cons];
 			rte_prefetch0(mbuf);
-			rxr->rx_buf_ring[cons].mbuf = NULL;
+			rxr->rx_buf_ring[cons] = NULL;
 
 			/* Set constant fields from mbuf initializer. */
 			vst1q_u64((uint64_t *)&mbuf->rearm_data, mbuf_init);
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
index 98220bc1b3..822e43343f 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
@@ -33,7 +33,7 @@ static inline void
 bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
 {
 	struct rx_prod_pkt_bd *rxbds = &rxr->rx_desc_ring[rxq->rxrearm_start];
-	struct bnxt_sw_rx_bd *rx_bufs = &rxr->rx_buf_ring[rxq->rxrearm_start];
+	struct rte_mbuf **rx_bufs = &rxr->rx_buf_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	int i;
 
@@ -55,8 +55,8 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
 		__m128i buf_addr0, buf_addr1;
 		__m128i rxbd0, rxbd1;
 
-		mb0 = rx_bufs[0].mbuf;
-		mb1 = rx_bufs[1].mbuf;
+		mb0 = rx_bufs[0];
+		mb1 = rx_bufs[1];
 
 		/* Load address fields from both mbufs */
 		buf_addr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
@@ -265,9 +265,9 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			raw_cons = tmp_raw_cons;
 			cons = rxcmp->opaque;
 
-			mbuf = rxr->rx_buf_ring[cons].mbuf;
+			mbuf = rxr->rx_buf_ring[cons];
 			rte_prefetch0(mbuf);
-			rxr->rx_buf_ring[cons].mbuf = NULL;
+			rxr->rx_buf_ring[cons] = NULL;
 
 			/* Set constant fields from mbuf initializer. */
 			_mm_store_si128((__m128i *)&mbuf->rearm_data,
-- 
2.25.1
^ permalink raw reply	[flat|nested] 22+ messages in thread
- * [dpdk-dev] [PATCH 04/12] net/bnxt: require async cq for vector mode
  2020-09-09 15:52 [dpdk-dev] [PATCH 00/12] net/bnxt: vector PMD improvements Lance Richardson
                   ` (2 preceding siblings ...)
  2020-09-09 15:52 ` [dpdk-dev] [PATCH 03/12] net/bnxt: use appropriate type for Rx mbuf ring Lance Richardson
@ 2020-09-09 15:52 ` Lance Richardson
  2020-09-11 15:02   ` Ferruh Yigit
  2020-09-09 15:52 ` [dpdk-dev] [PATCH 05/12] net/bnxt: improve support for small ring sizes Lance Richardson
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 22+ messages in thread
From: Lance Richardson @ 2020-09-09 15:52 UTC (permalink / raw)
  To: Ajit Khaparde, Somnath Kotur; +Cc: dev
Disable support for vector mode when async completions can be placed
in a receive completion ring and change the default for all platforms
to use a dedicated async completion ring.
Simplify completion handling in vector mode receive paths now that
it no longer needs to handle async completions.
Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
---
 drivers/net/bnxt/bnxt.h               |  19 ++--
 drivers/net/bnxt/bnxt_ethdev.c        |   2 +-
 drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 121 +++++++++++---------------
 drivers/net/bnxt/bnxt_rxtx_vec_sse.c  | 116 +++++++++++-------------
 4 files changed, 111 insertions(+), 147 deletions(-)
diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index a190d78bdd..ef5824cf9a 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -119,20 +119,19 @@
 	(BNXT_CHIP_THOR(bp) ? TPA_MAX_SEGS_TH : \
 			      TPA_MAX_SEGS)
 
-#ifdef RTE_ARCH_ARM64
-#define BNXT_NUM_ASYNC_CPR(bp) (BNXT_STINGRAY(bp) ? 0 : 1)
+/*
+ * Define the number of async completion rings to be used. Set to zero for
+ * configurations in which the maximum number of packet completion rings
+ * for packet completions is desired or when async completion handling
+ * cannot be interrupt-driven.
+ */
+#ifdef RTE_EXEC_ENV_FREEBSD
+/* In FreeBSD OS, nic_uio driver does not support interrupts */
+#define BNXT_NUM_ASYNC_CPR(bp) 0
 #else
 #define BNXT_NUM_ASYNC_CPR(bp) 1
 #endif
 
-/* In FreeBSD OS, nic_uio driver does not support interrupts */
-#ifdef RTE_EXEC_ENV_FREEBSD
-#ifdef BNXT_NUM_ASYNC_CPR
-#undef BNXT_NUM_ASYNC_CPR
-#endif
-#define BNXT_NUM_ASYNC_CPR(bp)	0
-#endif
-
 #define BNXT_MISC_VEC_ID               RTE_INTR_VEC_ZERO_OFFSET
 #define BNXT_RX_VEC_START              RTE_INTR_VEC_RXTX_OFFSET
 
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index c57c5cc2af..1ad9bfc0a6 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1114,7 +1114,7 @@ bnxt_receive_function(struct rte_eth_dev *eth_dev)
 		DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
 		DEV_RX_OFFLOAD_RSS_HASH |
 		DEV_RX_OFFLOAD_VLAN_FILTER)) &&
-	    !BNXT_TRUFLOW_EN(bp)) {
+	    !BNXT_TRUFLOW_EN(bp) && BNXT_NUM_ASYNC_CPR(bp)) {
 		PMD_DRV_LOG(INFO, "Using vector mode receive for port %d\n",
 			    eth_dev->data->port_id);
 		bp->flags |= BNXT_FLAG_RX_VECTOR_PKT_MODE;
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
index eff196f3a0..a212d46cbe 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
@@ -206,7 +206,6 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint32_t cons;
 	int nb_rx_pkts = 0;
 	struct rx_pkt_cmpl *rxcmp;
-	bool evt = false;
 	const uint64x2_t mbuf_init = {rxq->mbuf_initializer, 0};
 	const uint8x16_t shuf_msk = {
 		0xFF, 0xFF, 0xFF, 0xFF,    /* pkt_type (zeroes) */
@@ -215,6 +214,7 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		0xFF, 0xFF,                /* vlan_tci (zeroes) */
 		12, 13, 14, 15             /* rss hash */
 	};
+	int i;
 
 	/* If Rx Q was stopped return */
 	if (unlikely(!rxq->rx_started))
@@ -226,90 +226,73 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	/* Return no more than RTE_BNXT_MAX_RX_BURST per call. */
 	nb_pkts = RTE_MIN(nb_pkts, RTE_BNXT_MAX_RX_BURST);
 
-	/* Make nb_pkts an integer multiple of RTE_BNXT_DESCS_PER_LOOP */
+	/* Make nb_pkts an integer multiple of RTE_BNXT_DESCS_PER_LOOP. */
 	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_BNXT_DESCS_PER_LOOP);
 	if (!nb_pkts)
 		return 0;
 
 	/* Handle RX burst request */
-	while (1) {
+	for (i = 0; i < nb_pkts; i++) {
+		struct rx_pkt_cmpl_hi *rxcmp1;
+		struct rte_mbuf *mbuf;
+		uint64x2_t mm_rxcmp;
+		uint8x16_t pkt_mb;
+
 		cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
 
 		rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons];
+		rxcmp1 = (struct rx_pkt_cmpl_hi *)&cpr->cp_desc_ring[cons + 1];
 
-		if (!CMP_VALID(rxcmp, raw_cons, cpr->cp_ring_struct))
+		if (!CMP_VALID(rxcmp1, raw_cons + 1, cpr->cp_ring_struct))
 			break;
 
-		if (likely(CMP_TYPE(rxcmp) == RX_PKT_CMPL_TYPE_RX_L2)) {
-			struct rx_pkt_cmpl_hi *rxcmp1;
-			uint32_t tmp_raw_cons;
-			uint16_t cp_cons;
-			struct rte_mbuf *mbuf;
-			uint64x2_t mm_rxcmp;
-			uint8x16_t pkt_mb;
-
-			tmp_raw_cons = NEXT_RAW_CMP(raw_cons);
-			cp_cons = RING_CMP(cpr->cp_ring_struct, tmp_raw_cons);
-			rxcmp1 = (struct rx_pkt_cmpl_hi *)
-						&cpr->cp_desc_ring[cp_cons];
-
-			if (!CMP_VALID(rxcmp1, tmp_raw_cons,
-				       cpr->cp_ring_struct))
-				break;
-
-			raw_cons = tmp_raw_cons;
-			cons = rxcmp->opaque;
-
-			mbuf = rxr->rx_buf_ring[cons];
-			rte_prefetch0(mbuf);
-			rxr->rx_buf_ring[cons] = NULL;
-
-			/* Set constant fields from mbuf initializer. */
-			vst1q_u64((uint64_t *)&mbuf->rearm_data, mbuf_init);
-
-			/* Set mbuf pkt_len, data_len, and rss_hash fields. */
-			mm_rxcmp = vld1q_u64((uint64_t *)rxcmp);
-			pkt_mb = vqtbl1q_u8(vreinterpretq_u8_u64(mm_rxcmp),
-					    shuf_msk);
-			vst1q_u64((uint64_t *)&mbuf->rx_descriptor_fields1,
-				  vreinterpretq_u64_u8(pkt_mb));
-
-			rte_compiler_barrier();
-
-			if (rxcmp->flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID)
-				mbuf->ol_flags |= PKT_RX_RSS_HASH;
-
-			if (rxcmp1->flags2 &
-			    RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) {
-				mbuf->vlan_tci = rxcmp1->metadata &
-					(RX_PKT_CMPL_METADATA_VID_MASK |
-					RX_PKT_CMPL_METADATA_DE |
-					RX_PKT_CMPL_METADATA_PRI_MASK);
-				mbuf->ol_flags |=
-					PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
-			}
-
-			bnxt_parse_csum(mbuf, rxcmp1);
-			mbuf->packet_type = bnxt_parse_pkt_type(rxcmp, rxcmp1);
-
-			rx_pkts[nb_rx_pkts++] = mbuf;
-		} else if (!BNXT_NUM_ASYNC_CPR(rxq->bp)) {
-			evt =
-			bnxt_event_hwrm_resp_handler(rxq->bp,
-						     (struct cmpl_base *)rxcmp);
+		raw_cons += 2;
+		cons = rxcmp->opaque;
+
+		mbuf = rxr->rx_buf_ring[cons];
+		rte_prefetch0(mbuf);
+		rxr->rx_buf_ring[cons] = NULL;
+
+		/* Set constant fields from mbuf initializer. */
+		vst1q_u64((uint64_t *)&mbuf->rearm_data, mbuf_init);
+
+		/* Set mbuf pkt_len, data_len, and rss_hash fields. */
+		mm_rxcmp = vld1q_u64((uint64_t *)rxcmp);
+		pkt_mb = vqtbl1q_u8(vreinterpretq_u8_u64(mm_rxcmp), shuf_msk);
+		vst1q_u64((uint64_t *)&mbuf->rx_descriptor_fields1,
+			  vreinterpretq_u64_u8(pkt_mb));
+
+		rte_compiler_barrier();
+
+		if (rxcmp->flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID)
+			mbuf->ol_flags |= PKT_RX_RSS_HASH;
+
+		if (rxcmp1->flags2 &
+		    RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) {
+			mbuf->vlan_tci = rxcmp1->metadata &
+				(RX_PKT_CMPL_METADATA_VID_MASK |
+				RX_PKT_CMPL_METADATA_DE |
+				RX_PKT_CMPL_METADATA_PRI_MASK);
+			mbuf->ol_flags |=
+				PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
 		}
 
-		raw_cons = NEXT_RAW_CMP(raw_cons);
-		if (nb_rx_pkts == nb_pkts || evt)
-			break;
+		bnxt_parse_csum(mbuf, rxcmp1);
+		mbuf->packet_type = bnxt_parse_pkt_type(rxcmp, rxcmp1);
+
+		rx_pkts[nb_rx_pkts++] = mbuf;
 	}
-	rxr->rx_prod = RING_ADV(rxr->rx_ring_struct, rxr->rx_prod, nb_rx_pkts);
 
-	rxq->rxrearm_nb += nb_rx_pkts;
-	cpr->cp_raw_cons = raw_cons;
-	cpr->valid = !!(cpr->cp_raw_cons & cpr->cp_ring_struct->ring_size);
-	if (nb_rx_pkts || evt)
+	if (nb_rx_pkts) {
+		rxr->rx_prod =
+			RING_ADV(rxr->rx_ring_struct, rxr->rx_prod, nb_rx_pkts);
+
+		rxq->rxrearm_nb += nb_rx_pkts;
+		cpr->cp_raw_cons = raw_cons;
+		cpr->valid =
+			!!(cpr->cp_raw_cons & cpr->cp_ring_struct->ring_size);
 		bnxt_db_cq(cpr);
+	}
 
 	return nb_rx_pkts;
 }
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
index 822e43343f..c00d7f6807 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
@@ -210,7 +210,6 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint32_t cons;
 	int nb_rx_pkts = 0;
 	struct rx_pkt_cmpl *rxcmp;
-	bool evt = false;
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
 	const __m128i shuf_msk =
 		_mm_set_epi8(15, 14, 13, 12,          /* rss */
@@ -218,6 +217,7 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			     3, 2,                    /* data_len */
 			     0xFF, 0xFF, 3, 2,        /* pkt_len */
 			     0xFF, 0xFF, 0xFF, 0xFF); /* pkt_type (zeroes) */
+	int i;
 
 	/* If Rx Q was stopped return */
 	if (unlikely(!rxq->rx_started))
@@ -238,83 +238,65 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		return 0;
 
 	/* Handle RX burst request */
-	while (1) {
+	for (i = 0; i < nb_pkts; i++) {
+		struct rx_pkt_cmpl_hi *rxcmp1;
+		struct rte_mbuf *mbuf;
+		__m128i mm_rxcmp, pkt_mb;
+
 		cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
 
 		rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons];
+		rxcmp1 = (struct rx_pkt_cmpl_hi *)&cpr->cp_desc_ring[cons + 1];
 
-		if (!CMP_VALID(rxcmp, raw_cons, cpr->cp_ring_struct))
+		if (!CMP_VALID(rxcmp1, raw_cons + 1, cpr->cp_ring_struct))
 			break;
 
-		if (likely(CMP_TYPE(rxcmp) == RX_PKT_CMPL_TYPE_RX_L2)) {
-			struct rx_pkt_cmpl_hi *rxcmp1;
-			uint32_t tmp_raw_cons;
-			uint16_t cp_cons;
-			struct rte_mbuf *mbuf;
-			__m128i mm_rxcmp, pkt_mb;
-
-			tmp_raw_cons = NEXT_RAW_CMP(raw_cons);
-			cp_cons = RING_CMP(cpr->cp_ring_struct, tmp_raw_cons);
-			rxcmp1 = (struct rx_pkt_cmpl_hi *)
-						&cpr->cp_desc_ring[cp_cons];
-
-			if (!CMP_VALID(rxcmp1, tmp_raw_cons,
-				       cpr->cp_ring_struct))
-				break;
-
-			raw_cons = tmp_raw_cons;
-			cons = rxcmp->opaque;
-
-			mbuf = rxr->rx_buf_ring[cons];
-			rte_prefetch0(mbuf);
-			rxr->rx_buf_ring[cons] = NULL;
-
-			/* Set constant fields from mbuf initializer. */
-			_mm_store_si128((__m128i *)&mbuf->rearm_data,
-					mbuf_init);
-
-			/* Set mbuf pkt_len, data_len, and rss_hash fields. */
-			mm_rxcmp = _mm_load_si128((__m128i *)rxcmp);
-			pkt_mb = _mm_shuffle_epi8(mm_rxcmp, shuf_msk);
-			_mm_storeu_si128((void *)&mbuf->rx_descriptor_fields1,
-					 pkt_mb);
-
-			rte_compiler_barrier();
-
-			if (rxcmp->flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID)
-				mbuf->ol_flags |= PKT_RX_RSS_HASH;
-
-			if (rxcmp1->flags2 &
-			    RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) {
-				mbuf->vlan_tci = rxcmp1->metadata &
-					(RX_PKT_CMPL_METADATA_VID_MASK |
-					RX_PKT_CMPL_METADATA_DE |
-					RX_PKT_CMPL_METADATA_PRI_MASK);
-				mbuf->ol_flags |=
-					PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
-			}
-
-			bnxt_parse_csum(mbuf, rxcmp1);
-			mbuf->packet_type = bnxt_parse_pkt_type(rxcmp, rxcmp1);
-
-			rx_pkts[nb_rx_pkts++] = mbuf;
-		} else if (!BNXT_NUM_ASYNC_CPR(rxq->bp)) {
-			evt =
-			bnxt_event_hwrm_resp_handler(rxq->bp,
-						     (struct cmpl_base *)rxcmp);
+		raw_cons += 2;
+		cons = rxcmp->opaque;
+
+		mbuf = rxr->rx_buf_ring[cons];
+		rte_prefetch0(mbuf);
+		rxr->rx_buf_ring[cons] = NULL;
+
+		/* Set constant fields from mbuf initializer. */
+		_mm_store_si128((__m128i *)&mbuf->rearm_data, mbuf_init);
+
+		/* Set mbuf pkt_len, data_len, and rss_hash fields. */
+		mm_rxcmp = _mm_load_si128((__m128i *)rxcmp);
+		pkt_mb = _mm_shuffle_epi8(mm_rxcmp, shuf_msk);
+		_mm_storeu_si128((void *)&mbuf->rx_descriptor_fields1, pkt_mb);
+
+		rte_compiler_barrier();
+
+		if (rxcmp->flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID)
+			mbuf->ol_flags |= PKT_RX_RSS_HASH;
+
+		if (rxcmp1->flags2 &
+		    RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) {
+			mbuf->vlan_tci = rxcmp1->metadata &
+				(RX_PKT_CMPL_METADATA_VID_MASK |
+				RX_PKT_CMPL_METADATA_DE |
+				RX_PKT_CMPL_METADATA_PRI_MASK);
+			mbuf->ol_flags |=
+				PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
 		}
 
-		raw_cons = NEXT_RAW_CMP(raw_cons);
-		if (nb_rx_pkts == nb_pkts || evt)
-			break;
+		bnxt_parse_csum(mbuf, rxcmp1);
+		mbuf->packet_type = bnxt_parse_pkt_type(rxcmp, rxcmp1);
+
+		rx_pkts[nb_rx_pkts++] = mbuf;
 	}
-	rxr->rx_prod = RING_ADV(rxr->rx_ring_struct, rxr->rx_prod, nb_rx_pkts);
 
-	rxq->rxrearm_nb += nb_rx_pkts;
-	cpr->cp_raw_cons = raw_cons;
-	cpr->valid = !!(cpr->cp_raw_cons & cpr->cp_ring_struct->ring_size);
-	if (nb_rx_pkts || evt)
+	if (nb_rx_pkts) {
+		rxr->rx_prod =
+			RING_ADV(rxr->rx_ring_struct, rxr->rx_prod, nb_rx_pkts);
+
+		rxq->rxrearm_nb += nb_rx_pkts;
+		cpr->cp_raw_cons = raw_cons;
+		cpr->valid =
+			!!(cpr->cp_raw_cons & cpr->cp_ring_struct->ring_size);
 		bnxt_db_cq(cpr);
+	}
 
 	return nb_rx_pkts;
 }
-- 
2.25.1
^ permalink raw reply	[flat|nested] 22+ messages in thread
- * Re: [dpdk-dev] [PATCH 04/12] net/bnxt: require async cq for vector mode
  2020-09-09 15:52 ` [dpdk-dev] [PATCH 04/12] net/bnxt: require async cq for vector mode Lance Richardson
@ 2020-09-11 15:02   ` Ferruh Yigit
  2020-09-11 15:07     ` Lance Richardson
  0 siblings, 1 reply; 22+ messages in thread
From: Ferruh Yigit @ 2020-09-11 15:02 UTC (permalink / raw)
  To: Lance Richardson, Ajit Khaparde, Somnath Kotur; +Cc: dev
On 9/9/2020 4:52 PM, Lance Richardson wrote:
> Disable support for vector mode when async completions can be placed
> in a receive completion ring and change the default for all platforms
> to use a dedicated async completion ring.
> 
> Simplify completion handling in vector mode receive paths now that
> it no longer needs to handle async completions.
I guess 'cq' is "completion queue"(?), based on above description I understand
vector mode no more handles async completions and to enable vector mode there
should be at lest one async completion ring. If correct, would it be OK if I
update the title as:
net/bnxt: require async completion ring for vector mode
> 
> Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
> Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
<...>
^ permalink raw reply	[flat|nested] 22+ messages in thread 
- * Re: [dpdk-dev] [PATCH 04/12] net/bnxt: require async cq for vector mode
  2020-09-11 15:02   ` Ferruh Yigit
@ 2020-09-11 15:07     ` Lance Richardson
  0 siblings, 0 replies; 22+ messages in thread
From: Lance Richardson @ 2020-09-11 15:07 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: Ajit Khaparde, Somnath Kotur, dev
On Fri, Sep 11, 2020 at 11:03 AM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>
> On 9/9/2020 4:52 PM, Lance Richardson wrote:
> > Disable support for vector mode when async completions can be placed
> > in a receive completion ring and change the default for all platforms
> > to use a dedicated async completion ring.
> >
> > Simplify completion handling in vector mode receive paths now that
> > it no longer needs to handle async completions.
>
> I guess 'cq' is "completion queue"(?), based on above description I understand
> vector mode no more handles async completions and to enable vector mode there
> should be at lest one async completion ring. If correct, would it be OK if I
> update the title as:
> net/bnxt: require async completion ring for vector mode
That's correct. Your suggestion will make things clearer, sounds good to me.
Thanks,
    Lance
>
> >
> > Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
> > Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
>
> <...>
>
^ permalink raw reply	[flat|nested] 22+ messages in thread
 
 
- * [dpdk-dev] [PATCH 05/12] net/bnxt: improve support for small ring sizes
  2020-09-09 15:52 [dpdk-dev] [PATCH 00/12] net/bnxt: vector PMD improvements Lance Richardson
                   ` (3 preceding siblings ...)
  2020-09-09 15:52 ` [dpdk-dev] [PATCH 04/12] net/bnxt: require async cq for vector mode Lance Richardson
@ 2020-09-09 15:52 ` Lance Richardson
  2020-09-14 22:03   ` Ferruh Yigit
  2020-09-09 15:52 ` [dpdk-dev] [PATCH 06/12] net/bnxt: use smaller cq when agg ring not needed Lance Richardson
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 22+ messages in thread
From: Lance Richardson @ 2020-09-09 15:52 UTC (permalink / raw)
  To: Ajit Khaparde, Somnath Kotur; +Cc: dev
Improve support for small ring sizes:
   - Ensure that transmit free threshold is no more than 1/4 ring size.
   - Ensure that receive free threshold is no more than 1/4 ring size.
   - Validate requested ring sizes against minimum supported size.
   - Use rxq receive free threshold instead of fixed maximum burst
     size to trigger bulk receive buffer allocation.
Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
---
 drivers/net/bnxt/bnxt_rxq.c             |  6 +++--
 drivers/net/bnxt/bnxt_rxtx_vec_common.h | 10 +++++----
 drivers/net/bnxt/bnxt_rxtx_vec_neon.c   | 29 +++++++++++++------------
 drivers/net/bnxt/bnxt_rxtx_vec_sse.c    | 29 +++++++++++++------------
 drivers/net/bnxt/bnxt_txq.c             |  7 ++++--
 5 files changed, 45 insertions(+), 36 deletions(-)
diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c
index db9aa1f3ed..4ef3b5cb5c 100644
--- a/drivers/net/bnxt/bnxt_rxq.c
+++ b/drivers/net/bnxt/bnxt_rxq.c
@@ -14,6 +14,7 @@
 #include "bnxt_rxq.h"
 #include "bnxt_rxr.h"
 #include "bnxt_vnic.h"
+#include "bnxt_rxtx_vec_common.h"
 #include "hsi_struct_def_dpdk.h"
 
 /*
@@ -305,7 +306,7 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
 		return -EINVAL;
 	}
 
-	if (!nb_desc || nb_desc > MAX_RX_DESC_CNT) {
+	if (nb_desc < BNXT_MIN_RING_DESC || nb_desc > MAX_RX_DESC_CNT) {
 		PMD_DRV_LOG(ERR, "nb_desc %d is invalid\n", nb_desc);
 		rc = -EINVAL;
 		goto out;
@@ -326,7 +327,8 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
 	rxq->bp = bp;
 	rxq->mb_pool = mp;
 	rxq->nb_rx_desc = nb_desc;
-	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
+	rxq->rx_free_thresh =
+		RTE_MIN(rte_align32pow2(nb_desc) / 4, RTE_BNXT_MAX_RX_BURST);
 
 	PMD_DRV_LOG(DEBUG, "RX Buf MTU %d\n", eth_dev->data->mtu);
 
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_common.h b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
index 3da3c48f4e..2f28759d06 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_common.h
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
@@ -5,11 +5,13 @@
 
 #ifndef _BNXT_RXTX_VEC_COMMON_H_
 #define _BNXT_RXTX_VEC_COMMON_H_
+#include "hsi_struct_def_dpdk.h"
+#include "bnxt_rxq.h"
+#include "bnxt_rxr.h"
 
-#define RTE_BNXT_MAX_RX_BURST		32
-#define RTE_BNXT_MAX_TX_BURST		32
-#define RTE_BNXT_RXQ_REARM_THRESH	32
-#define RTE_BNXT_DESCS_PER_LOOP		4
+#define RTE_BNXT_MAX_RX_BURST		32U
+#define RTE_BNXT_MAX_TX_BURST		32U
+#define RTE_BNXT_DESCS_PER_LOOP		4U
 
 #define TX_BD_FLAGS_CMPL ((1 << TX_BD_LONG_FLAGS_BD_CNT_SFT) | \
 			  TX_BD_SHORT_FLAGS_COAL_NOW | \
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
index a212d46cbe..7f3eabcda1 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
@@ -13,9 +13,6 @@
 #include "bnxt.h"
 #include "bnxt_cpr.h"
 #include "bnxt_ring.h"
-#include "bnxt_rxr.h"
-#include "bnxt_rxq.h"
-#include "hsi_struct_def_dpdk.h"
 #include "bnxt_rxtx_vec_common.h"
 
 #include "bnxt_txq.h"
@@ -31,23 +28,27 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
 	struct rx_prod_pkt_bd *rxbds = &rxr->rx_desc_ring[rxq->rxrearm_start];
 	struct rte_mbuf **rx_bufs = &rxr->rx_buf_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
-	int i;
+	int nb, i;
 
 	const uint64x2_t hdr_room = {0, RTE_PKTMBUF_HEADROOM};
 	const uint64x2_t addrmask = {0, UINT64_MAX};
 
-	/* Pull RTE_BNXT_RXQ_REARM_THRESH more mbufs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mb_pool,
-				 (void *)rx_bufs,
-				 RTE_BNXT_RXQ_REARM_THRESH) < 0) {
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_BNXT_RXQ_REARM_THRESH;
+	/*
+	 * Number of mbufs to allocate must be a multiple of two. The
+	 * allocation must not go past the end of the ring.
+	 */
+	nb = RTE_MIN(rxq->rxrearm_nb & ~0x1,
+		     rxq->nb_rx_desc - rxq->rxrearm_start);
+
+	/* Allocate new mbufs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mb_pool, (void *)rx_bufs, nb) < 0) {
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += nb;
 
 		return;
 	}
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_BNXT_RXQ_REARM_THRESH; i += 2, rx_bufs += 2) {
+	for (i = 0; i < nb; i += 2, rx_bufs += 2) {
 		uint64x2_t buf_addr0, buf_addr1;
 		uint64x2_t rxbd0, rxbd1;
 
@@ -83,12 +84,12 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
 		vst1q_u64((uint64_t *)(rxbds++), rxbd1);
 	}
 
-	rxq->rxrearm_start += RTE_BNXT_RXQ_REARM_THRESH;
+	rxq->rxrearm_start += nb;
 	bnxt_db_write(&rxr->rx_db, rxq->rxrearm_start - 1);
 	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
 		rxq->rxrearm_start = 0;
 
-	rxq->rxrearm_nb -= RTE_BNXT_RXQ_REARM_THRESH;
+	rxq->rxrearm_nb -= nb;
 }
 
 static uint32_t
@@ -220,7 +221,7 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	if (unlikely(!rxq->rx_started))
 		return 0;
 
-	if (rxq->rxrearm_nb >= RTE_BNXT_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb >= rxq->rx_free_thresh)
 		bnxt_rxq_rearm(rxq, rxr);
 
 	/* Return no more than RTE_BNXT_MAX_RX_BURST per call. */
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
index c00d7f6807..eced74e4e3 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
@@ -17,9 +17,6 @@
 #include "bnxt.h"
 #include "bnxt_cpr.h"
 #include "bnxt_ring.h"
-#include "bnxt_rxr.h"
-#include "bnxt_rxq.h"
-#include "hsi_struct_def_dpdk.h"
 #include "bnxt_rxtx_vec_common.h"
 
 #include "bnxt_txq.h"
@@ -35,23 +32,27 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
 	struct rx_prod_pkt_bd *rxbds = &rxr->rx_desc_ring[rxq->rxrearm_start];
 	struct rte_mbuf **rx_bufs = &rxr->rx_buf_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
-	int i;
+	int nb, i;
 
 	const __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, 0);
 	const __m128i addrmask = _mm_set_epi64x(UINT64_MAX, 0);
 
-	/* Pull RTE_BNXT_RXQ_REARM_THRESH more mbufs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mb_pool,
-				 (void *)rx_bufs,
-				 RTE_BNXT_RXQ_REARM_THRESH) < 0) {
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_BNXT_RXQ_REARM_THRESH;
+	/*
+	 * Number of mbufs to allocate must be a multiple of two. The
+	 * allocation must not go past the end of the ring.
+	 */
+	nb = RTE_MIN(rxq->rxrearm_nb & ~0x1,
+		     rxq->nb_rx_desc - rxq->rxrearm_start);
+
+	/* Allocate new mbufs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mb_pool, (void *)rx_bufs, nb) < 0) {
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += nb;
 
 		return;
 	}
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_BNXT_RXQ_REARM_THRESH; i += 2, rx_bufs += 2) {
+	for (i = 0; i < nb; i += 2, rx_bufs += 2) {
 		__m128i buf_addr0, buf_addr1;
 		__m128i rxbd0, rxbd1;
 
@@ -87,12 +88,12 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
 		_mm_store_si128((__m128i *)(rxbds++), rxbd1);
 	}
 
-	rxq->rxrearm_start += RTE_BNXT_RXQ_REARM_THRESH;
+	rxq->rxrearm_start += nb;
 	bnxt_db_write(&rxr->rx_db, rxq->rxrearm_start - 1);
 	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
 		rxq->rxrearm_start = 0;
 
-	rxq->rxrearm_nb -= RTE_BNXT_RXQ_REARM_THRESH;
+	rxq->rxrearm_nb -= nb;
 }
 
 static uint32_t
@@ -223,7 +224,7 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	if (unlikely(!rxq->rx_started))
 		return 0;
 
-	if (rxq->rxrearm_nb >= RTE_BNXT_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb >= rxq->rx_free_thresh)
 		bnxt_rxq_rearm(rxq, rxr);
 
 	/* Return no more than RTE_BNXT_MAX_RX_BURST per call. */
diff --git a/drivers/net/bnxt/bnxt_txq.c b/drivers/net/bnxt/bnxt_txq.c
index 2d7645eeb0..42930abbf5 100644
--- a/drivers/net/bnxt/bnxt_txq.c
+++ b/drivers/net/bnxt/bnxt_txq.c
@@ -11,6 +11,7 @@
 #include "bnxt_ring.h"
 #include "bnxt_txq.h"
 #include "bnxt_txr.h"
+#include "bnxt_rxtx_vec_common.h"
 
 /*
  * TX Queues
@@ -97,7 +98,7 @@ int bnxt_tx_queue_setup_op(struct rte_eth_dev *eth_dev,
 		return -EINVAL;
 	}
 
-	if (!nb_desc || nb_desc > MAX_TX_DESC_CNT) {
+	if (nb_desc < BNXT_MIN_RING_DESC || nb_desc > MAX_TX_DESC_CNT) {
 		PMD_DRV_LOG(ERR, "nb_desc %d is invalid", nb_desc);
 		rc = -EINVAL;
 		goto out;
@@ -129,7 +130,9 @@ int bnxt_tx_queue_setup_op(struct rte_eth_dev *eth_dev,
 	}
 	txq->bp = bp;
 	txq->nb_tx_desc = nb_desc;
-	txq->tx_free_thresh = tx_conf->tx_free_thresh;
+	txq->tx_free_thresh =
+		RTE_MIN(rte_align32pow2(nb_desc) / 4, RTE_BNXT_MAX_TX_BURST);
+
 	txq->tx_deferred_start = tx_conf->tx_deferred_start;
 
 	rc = bnxt_init_tx_ring_struct(txq, socket_id);
-- 
2.25.1
^ permalink raw reply	[flat|nested] 22+ messages in thread
- * Re: [dpdk-dev] [PATCH 05/12] net/bnxt: improve support for small ring sizes
  2020-09-09 15:52 ` [dpdk-dev] [PATCH 05/12] net/bnxt: improve support for small ring sizes Lance Richardson
@ 2020-09-14 22:03   ` Ferruh Yigit
  2020-09-15 14:12     ` Lance Richardson
  0 siblings, 1 reply; 22+ messages in thread
From: Ferruh Yigit @ 2020-09-14 22:03 UTC (permalink / raw)
  To: Lance Richardson, Ajit Khaparde, Somnath Kotur; +Cc: dev, rasland
On 9/9/2020 4:52 PM, Lance Richardson wrote:
> Improve support for small ring sizes:
>    - Ensure that transmit free threshold is no more than 1/4 ring size.
>    - Ensure that receive free threshold is no more than 1/4 ring size.
>    - Validate requested ring sizes against minimum supported size.
>    - Use rxq receive free threshold instead of fixed maximum burst
>      size to trigger bulk receive buffer allocation.
> 
> Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
> Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
> ---
>  drivers/net/bnxt/bnxt_rxq.c             |  6 +++--
>  drivers/net/bnxt/bnxt_rxtx_vec_common.h | 10 +++++----
>  drivers/net/bnxt/bnxt_rxtx_vec_neon.c   | 29 +++++++++++++------------
>  drivers/net/bnxt/bnxt_rxtx_vec_sse.c    | 29 +++++++++++++------------
>  drivers/net/bnxt/bnxt_txq.c             |  7 ++++--
>  5 files changed, 45 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c
> index db9aa1f3ed..4ef3b5cb5c 100644
> --- a/drivers/net/bnxt/bnxt_rxq.c
> +++ b/drivers/net/bnxt/bnxt_rxq.c
> @@ -14,6 +14,7 @@
>  #include "bnxt_rxq.h"
>  #include "bnxt_rxr.h"
>  #include "bnxt_vnic.h"
> +#include "bnxt_rxtx_vec_common.h"
>  #include "hsi_struct_def_dpdk.h"
>  
Hi Lance, Ajit, Somnath,
Raslan reported a build error for PPC architecture, this happens because
static inline functions in 'bnxt_rxtx_vec_common.h' are using
'rxq->rxrearm_nb' & 'rxq->rxrearm_start' which are not defined for PPC.
As far as I can see the 'bnxt_rxtx_vec_common.h' is included because of
some macros.
A quick fix can be to wrap all static inline functions with !PPC checks
but from the name of the header file, it looks like it shouldn't be
included by scalar datapath .c files at first place.
Instead it can be possible to extract those macros into another header
and both these .c file and 'bnxt_rxtx_vec_common.h' can include it.
Or 'bnxt_txq.h' & 'bnxt_txr.h' can be used to hold those macros, and
those headers were already included by 'bnxt_rxtx_vec_common.h' and .c
files.
Anyway can you please provide the fix as incremental patches on top of
latest head, so I can squash them into original patches.
If the patches can't be done as incremental fixes or if they will delay,
I will need to drop the patchset from next-net, to not block any
possible pull from main repo.
^ permalink raw reply	[flat|nested] 22+ messages in thread 
- * Re: [dpdk-dev] [PATCH 05/12] net/bnxt: improve support for small ring sizes
  2020-09-14 22:03   ` Ferruh Yigit
@ 2020-09-15 14:12     ` Lance Richardson
  0 siblings, 0 replies; 22+ messages in thread
From: Lance Richardson @ 2020-09-15 14:12 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: Ajit Khaparde, Somnath Kotur, dev, rasland
On Mon, Sep 14, 2020 at 6:03 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
<snip>
>
> Hi Lance, Ajit, Somnath,
>
> Raslan reported a build error for PPC architecture, this happens because
> static inline functions in 'bnxt_rxtx_vec_common.h' are using
> 'rxq->rxrearm_nb' & 'rxq->rxrearm_start' which are not defined for PPC.
>
> As far as I can see the 'bnxt_rxtx_vec_common.h' is included because of
> some macros.
> A quick fix can be to wrap all static inline functions with !PPC checks
> but from the name of the header file, it looks like it shouldn't be
> included by scalar datapath .c files at first place.
>
> Instead it can be possible to extract those macros into another header
> and both these .c file and 'bnxt_rxtx_vec_common.h' can include it.
> Or 'bnxt_txq.h' & 'bnxt_txr.h' can be used to hold those macros, and
> those headers were already included by 'bnxt_rxtx_vec_common.h' and .c
> files.
>
>
> Anyway can you please provide the fix as incremental patches on top of
> latest head, so I can squash them into original patches.
>
> If the patches can't be done as incremental fixes or if they will delay,
> I will need to drop the patchset from next-net, to not block any
> possible pull from main repo.
Hi Feruh,
I just sent a fix for this based on the head of next-net, compile-tested
on x86_64, arm64, and powerpc. Please let me know if you run into
any further snags.
Thanks,
    Lance
^ permalink raw reply	[flat|nested] 22+ messages in thread
 
 
- * [dpdk-dev] [PATCH 06/12] net/bnxt: use smaller cq when agg ring not needed
  2020-09-09 15:52 [dpdk-dev] [PATCH 00/12] net/bnxt: vector PMD improvements Lance Richardson
                   ` (4 preceding siblings ...)
  2020-09-09 15:52 ` [dpdk-dev] [PATCH 05/12] net/bnxt: improve support for small ring sizes Lance Richardson
@ 2020-09-09 15:52 ` Lance Richardson
  2020-09-09 15:53 ` [dpdk-dev] [PATCH 07/12] net/bnxt: increase max burst size for vector mode Lance Richardson
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Lance Richardson @ 2020-09-09 15:52 UTC (permalink / raw)
  To: Ajit Khaparde, Somnath Kotur; +Cc: dev
Don't allocate extra completion queue entries for aggregation
ring when aggregation ring will not be used.
Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
---
 drivers/net/bnxt/bnxt_ethdev.c | 11 +++++------
 drivers/net/bnxt/bnxt_rxr.c    | 21 +++++++++++++++++++--
 2 files changed, 24 insertions(+), 8 deletions(-)
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 1ad9bfc0a6..27eba431b8 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1295,6 +1295,8 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
 	eth_dev->data->dev_started = 0;
+	eth_dev->data->scattered_rx = 0;
+
 	/* Prevent crashes when queues are still in use */
 	eth_dev->rx_pkt_burst = &bnxt_dummy_recv_pkts;
 	eth_dev->tx_pkt_burst = &bnxt_dummy_xmit_pkts;
@@ -2695,14 +2697,12 @@ int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu)
 	new_pkt_size = new_mtu + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN +
 		       VLAN_TAG_SIZE * BNXT_NUM_VLANS;
 
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
 	/*
-	 * If vector-mode tx/rx is active, disallow any MTU change that would
-	 * require scattered receive support.
+	 * Disallow any MTU change that would require scattered receive support
+	 * if it is not already enabled.
 	 */
 	if (eth_dev->data->dev_started &&
-	    (eth_dev->rx_pkt_burst == bnxt_recv_pkts_vec ||
-	     eth_dev->tx_pkt_burst == bnxt_xmit_pkts_vec) &&
+	    !eth_dev->data->scattered_rx &&
 	    (new_pkt_size >
 	     eth_dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM)) {
 		PMD_DRV_LOG(ERR,
@@ -2710,7 +2710,6 @@ int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu)
 		PMD_DRV_LOG(ERR, "Stop port before changing MTU.\n");
 		return -EINVAL;
 	}
-#endif
 
 	if (new_mtu > RTE_ETHER_MTU) {
 		bp->flags |= BNXT_FLAG_JUMBO;
diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index 92102e3d57..5673e2b50f 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -938,9 +938,12 @@ void bnxt_free_rx_rings(struct bnxt *bp)
 
 int bnxt_init_rx_ring_struct(struct bnxt_rx_queue *rxq, unsigned int socket_id)
 {
+	struct rte_eth_dev *eth_dev = rxq->bp->eth_dev;
+	struct rte_eth_rxmode *rxmode;
 	struct bnxt_cp_ring_info *cpr;
 	struct bnxt_rx_ring_info *rxr;
 	struct bnxt_ring *ring;
+	bool use_agg_ring;
 
 	rxq->rx_buf_size = BNXT_MAX_PKT_LEN + sizeof(struct rte_mbuf);
 
@@ -978,8 +981,22 @@ int bnxt_init_rx_ring_struct(struct bnxt_rx_queue *rxq, unsigned int socket_id)
 	if (ring == NULL)
 		return -ENOMEM;
 	cpr->cp_ring_struct = ring;
-	ring->ring_size = rte_align32pow2(rxr->rx_ring_struct->ring_size *
-					  (2 + AGG_RING_SIZE_FACTOR));
+
+	rxmode = ð_dev->data->dev_conf.rxmode;
+	use_agg_ring = (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER) ||
+		       (rxmode->offloads & DEV_RX_OFFLOAD_TCP_LRO) ||
+		       (rxmode->max_rx_pkt_len >
+			 (uint32_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
+				    RTE_PKTMBUF_HEADROOM));
+
+	/* Allocate two completion slots per entry in desc ring. */
+	ring->ring_size = rxr->rx_ring_struct->ring_size * 2;
+
+	/* Allocate additional slots if aggregation ring is in use. */
+	if (use_agg_ring)
+		ring->ring_size *= AGG_RING_SIZE_FACTOR;
+
+	ring->ring_size = rte_align32pow2(ring->ring_size);
 	ring->ring_mask = ring->ring_size - 1;
 	ring->bd = (void *)cpr->cp_desc_ring;
 	ring->bd_dma = cpr->cp_desc_mapping;
-- 
2.25.1
^ permalink raw reply	[flat|nested] 22+ messages in thread
- * [dpdk-dev] [PATCH 07/12] net/bnxt: increase max burst size for vector mode
  2020-09-09 15:52 [dpdk-dev] [PATCH 00/12] net/bnxt: vector PMD improvements Lance Richardson
                   ` (5 preceding siblings ...)
  2020-09-09 15:52 ` [dpdk-dev] [PATCH 06/12] net/bnxt: use smaller cq when agg ring not needed Lance Richardson
@ 2020-09-09 15:53 ` Lance Richardson
  2020-09-11 15:19   ` Ferruh Yigit
  2020-09-09 15:53 ` [dpdk-dev] [PATCH 08/12] net/bnxt: use table-based packet type translation Lance Richardson
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 22+ messages in thread
From: Lance Richardson @ 2020-09-09 15:53 UTC (permalink / raw)
  To: Ajit Khaparde, Somnath Kotur; +Cc: dev, Kalesh Anakkur Purayil
Increase the maximum supported burst size for the bnxt vector
mode PMD from 32 to 64.
Reviewed-by: Kalesh Anakkur Purayil <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
---
 drivers/net/bnxt/bnxt_rxtx_vec_common.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_common.h b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
index 2f28759d06..fc2a12272b 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_common.h
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
@@ -9,8 +9,8 @@
 #include "bnxt_rxq.h"
 #include "bnxt_rxr.h"
 
-#define RTE_BNXT_MAX_RX_BURST		32U
-#define RTE_BNXT_MAX_TX_BURST		32U
+#define RTE_BNXT_MAX_RX_BURST		64U
+#define RTE_BNXT_MAX_TX_BURST		64U
 #define RTE_BNXT_DESCS_PER_LOOP		4U
 
 #define TX_BD_FLAGS_CMPL ((1 << TX_BD_LONG_FLAGS_BD_CNT_SFT) | \
-- 
2.25.1
^ permalink raw reply	[flat|nested] 22+ messages in thread
- * Re: [dpdk-dev] [PATCH 07/12] net/bnxt: increase max burst size for vector mode
  2020-09-09 15:53 ` [dpdk-dev] [PATCH 07/12] net/bnxt: increase max burst size for vector mode Lance Richardson
@ 2020-09-11 15:19   ` Ferruh Yigit
  2020-09-11 15:38     ` Lance Richardson
  0 siblings, 1 reply; 22+ messages in thread
From: Ferruh Yigit @ 2020-09-11 15:19 UTC (permalink / raw)
  To: Lance Richardson, Ajit Khaparde, Somnath Kotur
  Cc: dev, Kalesh Anakkur Purayil
On 9/9/2020 4:53 PM, Lance Richardson wrote:
> Increase the maximum supported burst size for the bnxt vector
> mode PMD from 32 to 64.
What is the motivation here? Like does it improve the performance? If so in
which conditions etc.. It would be nice to describe the why & impact.
> 
> Reviewed-by: Kalesh Anakkur Purayil <kalesh-anakkur.purayil@broadcom.com>
> Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
> Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
> ---
>  drivers/net/bnxt/bnxt_rxtx_vec_common.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_common.h b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
> index 2f28759d06..fc2a12272b 100644
> --- a/drivers/net/bnxt/bnxt_rxtx_vec_common.h
> +++ b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
> @@ -9,8 +9,8 @@
>  #include "bnxt_rxq.h"
>  #include "bnxt_rxr.h"
>  
> -#define RTE_BNXT_MAX_RX_BURST		32U
> -#define RTE_BNXT_MAX_TX_BURST		32U
> +#define RTE_BNXT_MAX_RX_BURST		64U
> +#define RTE_BNXT_MAX_TX_BURST		64U
>  #define RTE_BNXT_DESCS_PER_LOOP		4U
>  
>  #define TX_BD_FLAGS_CMPL ((1 << TX_BD_LONG_FLAGS_BD_CNT_SFT) | \
> 
^ permalink raw reply	[flat|nested] 22+ messages in thread 
- * Re: [dpdk-dev] [PATCH 07/12] net/bnxt: increase max burst size for vector mode
  2020-09-11 15:19   ` Ferruh Yigit
@ 2020-09-11 15:38     ` Lance Richardson
  2020-09-11 15:56       ` Ferruh Yigit
  0 siblings, 1 reply; 22+ messages in thread
From: Lance Richardson @ 2020-09-11 15:38 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: Ajit Khaparde, Somnath Kotur, dev, Kalesh Anakkur Purayil
On Fri, Sep 11, 2020 at 11:19 AM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>
> On 9/9/2020 4:53 PM, Lance Richardson wrote:
> > Increase the maximum supported burst size for the bnxt vector
> > mode PMD from 32 to 64.
>
> What is the motivation here? Like does it improve the performance? If so in
> which conditions etc.. It would be nice to describe the why & impact.
>
How about this:
    net/bnxt: increase max burst size for vector mode
    Increase the maximum supported burst size for the bnxt vector
    mode PMD from 32 to 64. With larger burst sizes, per-burst
    overhead is amortized over more packets, improving overall
    performance. For small packets this has been measured to
    provide a 4-10% increase in single-core throughput with
    testpmd iofwd.
Thanks,
    Lance
^ permalink raw reply	[flat|nested] 22+ messages in thread
- * Re: [dpdk-dev] [PATCH 07/12] net/bnxt: increase max burst size for vector mode
  2020-09-11 15:38     ` Lance Richardson
@ 2020-09-11 15:56       ` Ferruh Yigit
  0 siblings, 0 replies; 22+ messages in thread
From: Ferruh Yigit @ 2020-09-11 15:56 UTC (permalink / raw)
  To: Lance Richardson
  Cc: Ajit Khaparde, Somnath Kotur, dev, Kalesh Anakkur Purayil
On 9/11/2020 4:38 PM, Lance Richardson wrote:
> On Fri, Sep 11, 2020 at 11:19 AM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>>
>> On 9/9/2020 4:53 PM, Lance Richardson wrote:
>>> Increase the maximum supported burst size for the bnxt vector
>>> mode PMD from 32 to 64.
>>
>> What is the motivation here? Like does it improve the performance? If so in
>> which conditions etc.. It would be nice to describe the why & impact.
>>
> 
> How about this:
> 
>     net/bnxt: increase max burst size for vector mode
> 
>     Increase the maximum supported burst size for the bnxt vector
>     mode PMD from 32 to 64. With larger burst sizes, per-burst
>     overhead is amortized over more packets, improving overall
>     performance. For small packets this has been measured to
>     provide a 4-10% increase in single-core throughput with
>     testpmd iofwd.
> 
Will update while merging, thanks.
^ permalink raw reply	[flat|nested] 22+ messages in thread 
 
 
 
- * [dpdk-dev] [PATCH 08/12] net/bnxt: use table-based packet type translation
  2020-09-09 15:52 [dpdk-dev] [PATCH 00/12] net/bnxt: vector PMD improvements Lance Richardson
                   ` (6 preceding siblings ...)
  2020-09-09 15:53 ` [dpdk-dev] [PATCH 07/12] net/bnxt: increase max burst size for vector mode Lance Richardson
@ 2020-09-09 15:53 ` Lance Richardson
  2020-09-09 15:53 ` [dpdk-dev] [PATCH 09/12] net/bnxt: table-based handling for ol flags Lance Richardson
  2020-09-11  3:42 ` [dpdk-dev] [PATCH 00/12] net/bnxt: vector PMD improvements Ajit Khaparde
  9 siblings, 0 replies; 22+ messages in thread
From: Lance Richardson @ 2020-09-09 15:53 UTC (permalink / raw)
  To: Ajit Khaparde, Somnath Kotur; +Cc: dev
Use table-based method for translating receive packet descriptor
flags into rte_mbuf packet type values.
Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
---
 drivers/net/bnxt/bnxt_rxr.c           | 127 ++++++++++++++++----------
 drivers/net/bnxt/bnxt_rxr.h           |   2 +
 drivers/net/bnxt/bnxt_rxtx_vec_neon.c |  88 ++++++------------
 drivers/net/bnxt/bnxt_rxtx_vec_sse.c  |  81 +++++-----------
 4 files changed, 134 insertions(+), 164 deletions(-)
diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index 5673e2b50f..a882dd20be 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -322,62 +322,88 @@ static inline struct rte_mbuf *bnxt_tpa_end(
 	return mbuf;
 }
 
-static uint32_t
-bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1)
+uint32_t bnxt_ptype_table[BNXT_PTYPE_TBL_DIM] __rte_cache_aligned;
+
+static void __rte_cold
+bnxt_init_ptype_table(void)
 {
-	uint32_t l3, pkt_type = 0;
-	uint32_t t_ipcs = 0, ip6 = 0, vlan = 0;
-	uint32_t flags_type;
-
-	vlan = !!(rxcmp1->flags2 &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN));
-	pkt_type |= vlan ? RTE_PTYPE_L2_ETHER_VLAN : RTE_PTYPE_L2_ETHER;
-
-	t_ipcs = !!(rxcmp1->flags2 &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC));
-	ip6 = !!(rxcmp1->flags2 &
-		 rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_IP_TYPE));
-
-	flags_type = rxcmp->flags_type &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
-
-	if (!t_ipcs && !ip6)
-		l3 = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
-	else if (!t_ipcs && ip6)
-		l3 = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
-	else if (t_ipcs && !ip6)
-		l3 = RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
-	else
-		l3 = RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
+	uint32_t *pt = bnxt_ptype_table;
+	static bool initialized;
+	int ip6, tun, type;
+	uint32_t l3;
+	int i;
 
-	switch (flags_type) {
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_ICMP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_ICMP;
-		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_ICMP;
-		break;
+	if (initialized)
+		return;
 
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_TCP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_TCP;
+	for (i = 0; i < BNXT_PTYPE_TBL_DIM; i++) {
+		if (i & (RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN >> 2))
+			pt[i] = RTE_PTYPE_L2_ETHER_VLAN;
 		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_TCP;
-		break;
-
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_UDP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_UDP;
+			pt[i] = RTE_PTYPE_L2_ETHER;
+
+		ip6 = i & (RX_PKT_CMPL_FLAGS2_IP_TYPE >> 7);
+		tun = i & (RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC >> 2);
+		type = (i & 0x38) << 9;
+
+		if (!tun && !ip6)
+			l3 = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
+		else if (!tun && ip6)
+			l3 = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
+		else if (tun && !ip6)
+			l3 = RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
 		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_UDP;
-		break;
-
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_IP):
-		pkt_type |= l3;
-		break;
+			l3 = RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
+
+		switch (type) {
+		case RX_PKT_CMPL_FLAGS_ITYPE_ICMP:
+			if (tun)
+				pt[i] |= l3 | RTE_PTYPE_INNER_L4_ICMP;
+			else
+				pt[i] |= l3 | RTE_PTYPE_L4_ICMP;
+			break;
+		case RX_PKT_CMPL_FLAGS_ITYPE_TCP:
+			if (tun)
+				pt[i] |= l3 | RTE_PTYPE_INNER_L4_TCP;
+			else
+				pt[i] |= l3 | RTE_PTYPE_L4_TCP;
+			break;
+		case RX_PKT_CMPL_FLAGS_ITYPE_UDP:
+			if (tun)
+				pt[i] |= l3 | RTE_PTYPE_INNER_L4_UDP;
+			else
+				pt[i] |= l3 | RTE_PTYPE_L4_UDP;
+			break;
+		case RX_PKT_CMPL_FLAGS_ITYPE_IP:
+			pt[i] |= l3;
+			break;
+		}
 	}
+	initialized = true;
+}
+
+static uint32_t
+bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1)
+{
+	uint32_t flags_type, flags2;
+	uint8_t index;
 
-	return pkt_type;
+	flags_type = rte_le_to_cpu_16(rxcmp->flags_type);
+	flags2 = rte_le_to_cpu_32(rxcmp1->flags2);
+
+	/*
+	 * Index format:
+	 *     bit 0: RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC
+	 *     bit 1: RX_CMPL_FLAGS2_IP_TYPE
+	 *     bit 2: RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN
+	 *     bits 3-6: RX_PKT_CMPL_FLAGS_ITYPE
+	 */
+	index = ((flags_type & RX_PKT_CMPL_FLAGS_ITYPE_MASK) >> 9) |
+		((flags2 & (RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
+			   RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC)) >> 2) |
+		((flags2 & RX_PKT_CMPL_FLAGS2_IP_TYPE) >> 7);
+
+	return bnxt_ptype_table[index];
 }
 
 #ifdef RTE_LIBRTE_IEEE1588
@@ -1046,6 +1072,9 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq)
 	unsigned int i;
 	uint16_t size;
 
+	/* Initialize packet type table. */
+	bnxt_init_ptype_table();
+
 	size = rte_pktmbuf_data_room_size(rxq->mb_pool) - RTE_PKTMBUF_HEADROOM;
 	size = RTE_MIN(BNXT_MAX_PKT_LEN, size);
 
diff --git a/drivers/net/bnxt/bnxt_rxr.h b/drivers/net/bnxt/bnxt_rxr.h
index 5b9b5f3108..0e21c8f900 100644
--- a/drivers/net/bnxt/bnxt_rxr.h
+++ b/drivers/net/bnxt/bnxt_rxr.h
@@ -238,4 +238,6 @@ void bnxt_set_mark_in_mbuf(struct bnxt *bp,
 #define BNXT_CFA_META_EEM_TCAM_SHIFT		31
 #define BNXT_CFA_META_EM_TEST(x) ((x) >> BNXT_CFA_META_EEM_TCAM_SHIFT)
 
+#define BNXT_PTYPE_TBL_DIM	128
+extern uint32_t bnxt_ptype_table[BNXT_PTYPE_TBL_DIM];
 #endif
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
index 7f3eabcda1..fade67ec8e 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
@@ -93,61 +93,27 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
 }
 
 static uint32_t
-bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1)
+bnxt_parse_pkt_type(uint32x4_t mm_rxcmp, uint32x4_t mm_rxcmp1)
 {
-	uint32_t l3, pkt_type = 0;
-	uint32_t t_ipcs = 0, ip6 = 0, vlan = 0;
-	uint32_t flags_type;
-
-	vlan = !!(rxcmp1->flags2 &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN));
-	pkt_type |= vlan ? RTE_PTYPE_L2_ETHER_VLAN : RTE_PTYPE_L2_ETHER;
-
-	t_ipcs = !!(rxcmp1->flags2 &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC));
-	ip6 = !!(rxcmp1->flags2 &
-		 rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_IP_TYPE));
-
-	flags_type = rxcmp->flags_type &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
-
-	if (!t_ipcs && !ip6)
-		l3 = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
-	else if (!t_ipcs && ip6)
-		l3 = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
-	else if (t_ipcs && !ip6)
-		l3 = RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
-	else
-		l3 = RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
-
-	switch (flags_type) {
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_ICMP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_ICMP;
-		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_ICMP;
-		break;
+	uint32_t flags_type, flags2;
+	uint8_t index;
 
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_TCP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_TCP;
-		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_TCP;
-		break;
+	flags_type = vgetq_lane_u32(mm_rxcmp, 0);
+	flags2 = (uint16_t)vgetq_lane_u32(mm_rxcmp1, 0);
 
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_UDP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_UDP;
-		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_UDP;
-		break;
-
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_IP):
-		pkt_type |= l3;
-		break;
-	}
+	/*
+	 * Index format:
+	 *     bit 0: RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC
+	 *     bit 1: RX_CMPL_FLAGS2_IP_TYPE
+	 *     bit 2: RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN
+	 *     bits 3-6: RX_PKT_CMPL_FLAGS_ITYPE
+	 */
+	index = ((flags_type & RX_PKT_CMPL_FLAGS_ITYPE_MASK) >> 9) |
+		((flags2 & (RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
+			   RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC)) >> 2) |
+		((flags2 & RX_PKT_CMPL_FLAGS2_IP_TYPE) >> 7);
 
-	return pkt_type;
+	return bnxt_ptype_table[index];
 }
 
 static void
@@ -234,10 +200,12 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 	/* Handle RX burst request */
 	for (i = 0; i < nb_pkts; i++) {
+		uint32x4_t mm_rxcmp, mm_rxcmp1;
 		struct rx_pkt_cmpl_hi *rxcmp1;
 		struct rte_mbuf *mbuf;
-		uint64x2_t mm_rxcmp;
-		uint8x16_t pkt_mb;
+		uint32x4_t pkt_mb;
+		uint8x16_t tmp;
+		uint32_t ptype;
 
 		cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
 
@@ -247,6 +215,8 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		if (!CMP_VALID(rxcmp1, raw_cons + 1, cpr->cp_ring_struct))
 			break;
 
+		mm_rxcmp = vld1q_u32((uint32_t *)rxcmp);
+		mm_rxcmp1 = vld1q_u32((uint32_t *)rxcmp);
 		raw_cons += 2;
 		cons = rxcmp->opaque;
 
@@ -258,10 +228,12 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		vst1q_u64((uint64_t *)&mbuf->rearm_data, mbuf_init);
 
 		/* Set mbuf pkt_len, data_len, and rss_hash fields. */
-		mm_rxcmp = vld1q_u64((uint64_t *)rxcmp);
-		pkt_mb = vqtbl1q_u8(vreinterpretq_u8_u64(mm_rxcmp), shuf_msk);
-		vst1q_u64((uint64_t *)&mbuf->rx_descriptor_fields1,
-			  vreinterpretq_u64_u8(pkt_mb));
+		tmp = vqtbl1q_u8(vreinterpretq_u8_u32(mm_rxcmp), shuf_msk);
+		pkt_mb = vreinterpretq_u32_u8(tmp);
+		ptype = bnxt_parse_pkt_type(mm_rxcmp, mm_rxcmp1);
+		pkt_mb = vsetq_lane_u32(ptype, pkt_mb, 0);
+
+		vst1q_u32((uint32_t *)&mbuf->rx_descriptor_fields1, pkt_mb);
 
 		rte_compiler_barrier();
 
@@ -279,8 +251,6 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		}
 
 		bnxt_parse_csum(mbuf, rxcmp1);
-		mbuf->packet_type = bnxt_parse_pkt_type(rxcmp, rxcmp1);
-
 		rx_pkts[nb_rx_pkts++] = mbuf;
 	}
 
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
index eced74e4e3..69ffbe4cc9 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
@@ -96,62 +96,28 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
 	rxq->rxrearm_nb -= nb;
 }
 
-static uint32_t
-bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1)
+static __m128i
+bnxt_parse_pkt_type(__m128i mm_rxcmp, __m128i mm_rxcmp1)
 {
-	uint32_t l3, pkt_type = 0;
-	uint32_t t_ipcs = 0, ip6 = 0, vlan = 0;
-	uint32_t flags_type;
-
-	vlan = !!(rxcmp1->flags2 &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN));
-	pkt_type |= vlan ? RTE_PTYPE_L2_ETHER_VLAN : RTE_PTYPE_L2_ETHER;
-
-	t_ipcs = !!(rxcmp1->flags2 &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC));
-	ip6 = !!(rxcmp1->flags2 &
-		 rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_IP_TYPE));
-
-	flags_type = rxcmp->flags_type &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
-
-	if (!t_ipcs && !ip6)
-		l3 = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
-	else if (!t_ipcs && ip6)
-		l3 = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
-	else if (t_ipcs && !ip6)
-		l3 = RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
-	else
-		l3 = RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
-
-	switch (flags_type) {
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_ICMP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_ICMP;
-		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_ICMP;
-		break;
-
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_TCP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_TCP;
-		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_TCP;
-		break;
+	uint32_t flags_type, flags2;
+	uint8_t index;
 
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_UDP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_UDP;
-		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_UDP;
-		break;
+	flags_type = _mm_extract_epi16(mm_rxcmp, 0);
+	flags2 = _mm_extract_epi32(mm_rxcmp1, 0);
 
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_IP):
-		pkt_type |= l3;
-		break;
-	}
+	/*
+	 * Index format:
+	 *     bit 0: RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC
+	 *     bit 1: RX_CMPL_FLAGS2_IP_TYPE
+	 *     bit 2: RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN
+	 *     bits 3-6: RX_PKT_CMPL_FLAGS_ITYPE
+	 */
+	index = ((flags_type & RX_PKT_CMPL_FLAGS_ITYPE_MASK) >> 9) |
+		((flags2 & (RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
+			   RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC)) >> 2) |
+		((flags2 & RX_PKT_CMPL_FLAGS2_IP_TYPE) >> 7);
 
-	return pkt_type;
+	return _mm_set_epi32(0, 0, 0, bnxt_ptype_table[index]);
 }
 
 static void
@@ -242,7 +208,7 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	for (i = 0; i < nb_pkts; i++) {
 		struct rx_pkt_cmpl_hi *rxcmp1;
 		struct rte_mbuf *mbuf;
-		__m128i mm_rxcmp, pkt_mb;
+		__m128i mm_rxcmp, mm_rxcmp1, pkt_mb, ptype;
 
 		cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
 
@@ -252,6 +218,9 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		if (!CMP_VALID(rxcmp1, raw_cons + 1, cpr->cp_ring_struct))
 			break;
 
+		mm_rxcmp = _mm_load_si128((__m128i *)rxcmp);
+		mm_rxcmp1 = _mm_load_si128((__m128i *)rxcmp1);
+
 		raw_cons += 2;
 		cons = rxcmp->opaque;
 
@@ -263,8 +232,10 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		_mm_store_si128((__m128i *)&mbuf->rearm_data, mbuf_init);
 
 		/* Set mbuf pkt_len, data_len, and rss_hash fields. */
-		mm_rxcmp = _mm_load_si128((__m128i *)rxcmp);
 		pkt_mb = _mm_shuffle_epi8(mm_rxcmp, shuf_msk);
+		ptype = bnxt_parse_pkt_type(mm_rxcmp, mm_rxcmp1);
+		pkt_mb = _mm_blend_epi16(pkt_mb, ptype, 0x3);
+
 		_mm_storeu_si128((void *)&mbuf->rx_descriptor_fields1, pkt_mb);
 
 		rte_compiler_barrier();
@@ -283,8 +254,6 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		}
 
 		bnxt_parse_csum(mbuf, rxcmp1);
-		mbuf->packet_type = bnxt_parse_pkt_type(rxcmp, rxcmp1);
-
 		rx_pkts[nb_rx_pkts++] = mbuf;
 	}
 
-- 
2.25.1
^ permalink raw reply	[flat|nested] 22+ messages in thread
- * [dpdk-dev] [PATCH 09/12] net/bnxt: table-based handling for ol flags
  2020-09-09 15:52 [dpdk-dev] [PATCH 00/12] net/bnxt: vector PMD improvements Lance Richardson
                   ` (7 preceding siblings ...)
  2020-09-09 15:53 ` [dpdk-dev] [PATCH 08/12] net/bnxt: use table-based packet type translation Lance Richardson
@ 2020-09-09 15:53 ` Lance Richardson
  2020-09-11  3:42 ` [dpdk-dev] [PATCH 00/12] net/bnxt: vector PMD improvements Ajit Khaparde
  9 siblings, 0 replies; 22+ messages in thread
From: Lance Richardson @ 2020-09-09 15:53 UTC (permalink / raw)
  To: Ajit Khaparde, Somnath Kotur; +Cc: dev
Use table to translate receive descriptor status flags to
rte_mbuf ol_flags values.
Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
---
 drivers/net/bnxt/bnxt_rxr.c           | 166 ++++++++++++++++----------
 drivers/net/bnxt/bnxt_rxr.h           |   6 +
 drivers/net/bnxt/bnxt_rxtx_vec_neon.c |  99 ++++++---------
 drivers/net/bnxt/bnxt_rxtx_vec_sse.c  |  96 ++++++---------
 4 files changed, 181 insertions(+), 186 deletions(-)
diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index a882dd20be..33bd006530 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -406,6 +406,95 @@ bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1)
 	return bnxt_ptype_table[index];
 }
 
+uint32_t
+bnxt_ol_flags_table[BNXT_OL_FLAGS_TBL_DIM] __rte_cache_aligned;
+
+uint32_t
+bnxt_ol_flags_err_table[BNXT_OL_FLAGS_ERR_TBL_DIM] __rte_cache_aligned;
+
+static void __rte_cold
+bnxt_init_ol_flags_tables(void)
+{
+	static bool initialized;
+	uint32_t *pt;
+	int i;
+
+	if (initialized)
+		return;
+
+	/* Initialize ol_flags table. */
+	pt = bnxt_ol_flags_table;
+	for (i = 0; i < BNXT_OL_FLAGS_TBL_DIM; i++) {
+		pt[i] = 0;
+		if (i & RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN)
+			pt[i] |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
+
+		if (i & RX_PKT_CMPL_FLAGS2_IP_CS_CALC)
+			pt[i] |= PKT_RX_IP_CKSUM_GOOD;
+
+		if (i & RX_PKT_CMPL_FLAGS2_L4_CS_CALC)
+			pt[i] |= PKT_RX_L4_CKSUM_GOOD;
+
+		if (i & RX_PKT_CMPL_FLAGS2_T_L4_CS_CALC)
+			pt[i] |= PKT_RX_OUTER_L4_CKSUM_GOOD;
+	}
+
+	/* Initialize checksum error table. */
+	pt = bnxt_ol_flags_err_table;
+	for (i = 0; i < BNXT_OL_FLAGS_ERR_TBL_DIM; i++) {
+		pt[i] = 0;
+		if (i & (RX_PKT_CMPL_ERRORS_IP_CS_ERROR >> 4))
+			pt[i] |= PKT_RX_IP_CKSUM_BAD;
+
+		if (i & (RX_PKT_CMPL_ERRORS_L4_CS_ERROR >> 4))
+			pt[i] |= PKT_RX_L4_CKSUM_BAD;
+
+		if (i & (RX_PKT_CMPL_ERRORS_T_IP_CS_ERROR >> 4))
+			pt[i] |= PKT_RX_EIP_CKSUM_BAD;
+
+		if (i & (RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR >> 4))
+			pt[i] |= PKT_RX_OUTER_L4_CKSUM_BAD;
+	}
+
+	initialized = true;
+}
+
+static void
+bnxt_set_ol_flags(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1,
+		  struct rte_mbuf *mbuf)
+{
+	uint16_t flags_type, errors, flags;
+	uint64_t ol_flags;
+
+	flags_type = rte_le_to_cpu_16(rxcmp->flags_type);
+
+	flags = rte_le_to_cpu_32(rxcmp1->flags2) &
+				(RX_PKT_CMPL_FLAGS2_IP_CS_CALC |
+				 RX_PKT_CMPL_FLAGS2_L4_CS_CALC |
+				 RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC |
+				 RX_PKT_CMPL_FLAGS2_T_L4_CS_CALC |
+				 RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN);
+
+	errors = rte_le_to_cpu_16(rxcmp1->errors_v2) &
+				(RX_PKT_CMPL_ERRORS_IP_CS_ERROR |
+				 RX_PKT_CMPL_ERRORS_L4_CS_ERROR |
+				 RX_PKT_CMPL_ERRORS_T_IP_CS_ERROR |
+				 RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR);
+	errors = (errors >> 4) & flags;
+
+	ol_flags = bnxt_ol_flags_table[flags & ~errors];
+
+	if (errors)
+		ol_flags |= bnxt_ol_flags_err_table[errors];
+
+	if (flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID) {
+		mbuf->hash.rss = rte_le_to_cpu_32(rxcmp->rss_hash);
+		ol_flags |= PKT_RX_RSS_HASH;
+	}
+
+	mbuf->ol_flags = ol_flags;
+}
+
 #ifdef RTE_LIBRTE_IEEE1588
 static void
 bnxt_get_rx_ts_thor(struct bnxt *bp, uint32_t rx_ts_cmpl)
@@ -583,8 +672,7 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
 	int rc = 0;
 	uint8_t agg_buf = 0;
 	uint16_t cmp_type;
-	uint32_t flags2_f = 0, vfr_flag = 0, mark_id = 0;
-	uint16_t flags_type;
+	uint32_t vfr_flag = 0, mark_id = 0;
 	struct bnxt *bp = rxq->bp;
 
 	rxcmp = (struct rx_pkt_cmpl *)
@@ -653,13 +741,17 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
 	mbuf->pkt_len = rxcmp->len;
 	mbuf->data_len = mbuf->pkt_len;
 	mbuf->port = rxq->port_id;
-	mbuf->ol_flags = 0;
 
-	flags_type = rte_le_to_cpu_16(rxcmp->flags_type);
-	if (flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID) {
-		mbuf->hash.rss = rxcmp->rss_hash;
-		mbuf->ol_flags |= PKT_RX_RSS_HASH;
+	bnxt_set_ol_flags(rxcmp, rxcmp1, mbuf);
+
+#ifdef RTE_LIBRTE_IEEE1588
+	if (unlikely((rte_le_to_cpu_16(rxcmp->flags_type) &
+		      RX_PKT_CMPL_FLAGS_MASK) ==
+		      RX_PKT_CMPL_FLAGS_ITYPE_PTP_W_TIMESTAMP)) {
+		mbuf->ol_flags |= PKT_RX_IEEE1588_PTP | PKT_RX_IEEE1588_TMST;
+		bnxt_get_rx_ts_thor(rxq->bp, rxcmp1->reorder);
 	}
+#endif
 
 	if (BNXT_TRUFLOW_EN(bp))
 		mark_id = bnxt_ulp_set_mark_in_mbuf(rxq->bp, rxcmp1, mbuf,
@@ -667,66 +759,9 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
 	else
 		bnxt_set_mark_in_mbuf(rxq->bp, rxcmp1, mbuf);
 
-#ifdef RTE_LIBRTE_IEEE1588
-	if (unlikely((flags_type & RX_PKT_CMPL_FLAGS_MASK) ==
-		     RX_PKT_CMPL_FLAGS_ITYPE_PTP_W_TIMESTAMP)) {
-		mbuf->ol_flags |= PKT_RX_IEEE1588_PTP | PKT_RX_IEEE1588_TMST;
-		bnxt_get_rx_ts_thor(rxq->bp, rxcmp1->reorder);
-	}
-#endif
 	if (agg_buf)
 		bnxt_rx_pages(rxq, mbuf, &tmp_raw_cons, agg_buf, NULL);
 
-	if (rxcmp1->flags2 & RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) {
-		mbuf->vlan_tci = rxcmp1->metadata &
-			(RX_PKT_CMPL_METADATA_VID_MASK |
-			RX_PKT_CMPL_METADATA_DE |
-			RX_PKT_CMPL_METADATA_PRI_MASK);
-		mbuf->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
-	}
-
-	flags2_f = flags2_0xf(rxcmp1);
-	/* IP Checksum */
-	if (likely(IS_IP_NONTUNNEL_PKT(flags2_f))) {
-		if (unlikely(RX_CMP_IP_CS_ERROR(rxcmp1)))
-			mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
-		else if (unlikely(RX_CMP_IP_CS_UNKNOWN(rxcmp1)))
-			mbuf->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
-		else
-			mbuf->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
-	} else if (IS_IP_TUNNEL_PKT(flags2_f)) {
-		if (unlikely(RX_CMP_IP_OUTER_CS_ERROR(rxcmp1) ||
-			     RX_CMP_IP_CS_ERROR(rxcmp1)))
-			mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
-		else if (unlikely(RX_CMP_IP_CS_UNKNOWN(rxcmp1)))
-			mbuf->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
-		else
-			mbuf->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
-	}
-
-	/* L4 Checksum */
-	if (likely(IS_L4_NONTUNNEL_PKT(flags2_f))) {
-		if (unlikely(RX_CMP_L4_INNER_CS_ERR2(rxcmp1)))
-			mbuf->ol_flags |= PKT_RX_L4_CKSUM_BAD;
-		else
-			mbuf->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
-	} else if (IS_L4_TUNNEL_PKT(flags2_f)) {
-		if (unlikely(RX_CMP_L4_INNER_CS_ERR2(rxcmp1)))
-			mbuf->ol_flags |= PKT_RX_L4_CKSUM_BAD;
-		else
-			mbuf->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
-		if (unlikely(RX_CMP_L4_OUTER_CS_ERR2(rxcmp1))) {
-			mbuf->ol_flags |= PKT_RX_OUTER_L4_CKSUM_BAD;
-		} else if (unlikely(IS_L4_TUNNEL_PKT_ONLY_INNER_L4_CS
-				    (flags2_f))) {
-			mbuf->ol_flags |= PKT_RX_OUTER_L4_CKSUM_UNKNOWN;
-		} else {
-			mbuf->ol_flags |= PKT_RX_OUTER_L4_CKSUM_GOOD;
-		}
-	} else if (unlikely(RX_CMP_L4_CS_UNKNOWN(rxcmp1))) {
-		mbuf->ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
-	}
-
 	mbuf->packet_type = bnxt_parse_pkt_type(rxcmp, rxcmp1);
 
 #ifdef BNXT_DEBUG
@@ -1075,6 +1110,9 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq)
 	/* Initialize packet type table. */
 	bnxt_init_ptype_table();
 
+	/* Initialize offload flags parsing table. */
+	bnxt_init_ol_flags_tables();
+
 	size = rte_pktmbuf_data_room_size(rxq->mb_pool) - RTE_PKTMBUF_HEADROOM;
 	size = RTE_MIN(BNXT_MAX_PKT_LEN, size);
 
diff --git a/drivers/net/bnxt/bnxt_rxr.h b/drivers/net/bnxt/bnxt_rxr.h
index 0e21c8f900..4f5e23b855 100644
--- a/drivers/net/bnxt/bnxt_rxr.h
+++ b/drivers/net/bnxt/bnxt_rxr.h
@@ -240,4 +240,10 @@ void bnxt_set_mark_in_mbuf(struct bnxt *bp,
 
 #define BNXT_PTYPE_TBL_DIM	128
 extern uint32_t bnxt_ptype_table[BNXT_PTYPE_TBL_DIM];
+
+#define BNXT_OL_FLAGS_TBL_DIM	32
+extern uint32_t bnxt_ol_flags_table[BNXT_OL_FLAGS_TBL_DIM];
+
+#define BNXT_OL_FLAGS_ERR_TBL_DIM 16
+extern uint32_t bnxt_ol_flags_err_table[BNXT_OL_FLAGS_ERR_TBL_DIM];
 #endif
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
index fade67ec8e..37b8c83656 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
@@ -116,50 +116,28 @@ bnxt_parse_pkt_type(uint32x4_t mm_rxcmp, uint32x4_t mm_rxcmp1)
 	return bnxt_ptype_table[index];
 }
 
-static void
-bnxt_parse_csum(struct rte_mbuf *mbuf, struct rx_pkt_cmpl_hi *rxcmp1)
+static uint32_t
+bnxt_set_ol_flags(uint32x4_t mm_rxcmp, uint32x4_t mm_rxcmp1)
 {
-	uint32_t flags;
+	uint16_t flags_type, errors, flags;
+	uint32_t ol_flags;
 
-	flags = flags2_0xf(rxcmp1);
-	/* IP Checksum */
-	if (likely(IS_IP_NONTUNNEL_PKT(flags))) {
-		if (unlikely(RX_CMP_IP_CS_ERROR(rxcmp1)))
-			mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
-		else
-			mbuf->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
-	} else if (IS_IP_TUNNEL_PKT(flags)) {
-		if (unlikely(RX_CMP_IP_OUTER_CS_ERROR(rxcmp1) ||
-			     RX_CMP_IP_CS_ERROR(rxcmp1)))
-			mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
-		else
-			mbuf->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
-	} else if (unlikely(RX_CMP_IP_CS_UNKNOWN(rxcmp1))) {
-		mbuf->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
-	}
+	/* Extract rxcmp1->flags2. */
+	flags = vgetq_lane_u32(mm_rxcmp1, 0) & 0x1F;
+	/* Extract rxcmp->flags_type. */
+	flags_type = vgetq_lane_u32(mm_rxcmp, 0);
+	/* Extract rxcmp1->errors_v2. */
+	errors = (vgetq_lane_u32(mm_rxcmp1, 2) >> 4) & flags & 0xF;
 
-	/* L4 Checksum */
-	if (likely(IS_L4_NONTUNNEL_PKT(flags))) {
-		if (unlikely(RX_CMP_L4_INNER_CS_ERR2(rxcmp1)))
-			mbuf->ol_flags |= PKT_RX_L4_CKSUM_BAD;
-		else
-			mbuf->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
-	} else if (IS_L4_TUNNEL_PKT(flags)) {
-		if (unlikely(RX_CMP_L4_INNER_CS_ERR2(rxcmp1)))
-			mbuf->ol_flags |= PKT_RX_L4_CKSUM_BAD;
-		else
-			mbuf->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
-		if (unlikely(RX_CMP_L4_OUTER_CS_ERR2(rxcmp1))) {
-			mbuf->ol_flags |= PKT_RX_OUTER_L4_CKSUM_BAD;
-		} else if (unlikely(IS_L4_TUNNEL_PKT_ONLY_INNER_L4_CS
-				    (flags))) {
-			mbuf->ol_flags |= PKT_RX_OUTER_L4_CKSUM_UNKNOWN;
-		} else {
-			mbuf->ol_flags |= PKT_RX_OUTER_L4_CKSUM_GOOD;
-		}
-	} else if (unlikely(RX_CMP_L4_CS_UNKNOWN(rxcmp1))) {
-		mbuf->ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
-	}
+	ol_flags = bnxt_ol_flags_table[flags & ~errors];
+
+	if (flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID)
+		ol_flags |= PKT_RX_RSS_HASH;
+
+	if (errors)
+		ol_flags |= bnxt_ol_flags_err_table[errors];
+
+	return ol_flags;
 }
 
 uint16_t
@@ -202,10 +180,12 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	for (i = 0; i < nb_pkts; i++) {
 		uint32x4_t mm_rxcmp, mm_rxcmp1;
 		struct rx_pkt_cmpl_hi *rxcmp1;
+		uint32x4_t pkt_mb, rearm;
+		uint32_t ptype, ol_flags;
 		struct rte_mbuf *mbuf;
-		uint32x4_t pkt_mb;
+		uint16_t vlan_tci;
+		uint16x8_t tmp16;
 		uint8x16_t tmp;
-		uint32_t ptype;
 
 		cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
 
@@ -224,33 +204,30 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rte_prefetch0(mbuf);
 		rxr->rx_buf_ring[cons] = NULL;
 
-		/* Set constant fields from mbuf initializer. */
-		vst1q_u64((uint64_t *)&mbuf->rearm_data, mbuf_init);
+		/* Set fields from mbuf initializer and ol_flags. */
+		ol_flags = bnxt_set_ol_flags(mm_rxcmp, mm_rxcmp1);
+		rearm = vsetq_lane_u32(ol_flags,
+				       vreinterpretq_u32_u64(mbuf_init), 2);
+		vst1q_u32((uint32_t *)&mbuf->rearm_data, rearm);
 
 		/* Set mbuf pkt_len, data_len, and rss_hash fields. */
 		tmp = vqtbl1q_u8(vreinterpretq_u8_u32(mm_rxcmp), shuf_msk);
 		pkt_mb = vreinterpretq_u32_u8(tmp);
+
+		/* Set packet type. */
 		ptype = bnxt_parse_pkt_type(mm_rxcmp, mm_rxcmp1);
 		pkt_mb = vsetq_lane_u32(ptype, pkt_mb, 0);
 
-		vst1q_u32((uint32_t *)&mbuf->rx_descriptor_fields1, pkt_mb);
+		/* Set vlan_tci. */
+		vlan_tci = vgetq_lane_u32(mm_rxcmp1, 1);
+		tmp16 = vsetq_lane_u16(vlan_tci,
+				       vreinterpretq_u16_u32(pkt_mb),
+				       5);
+		pkt_mb = vreinterpretq_u32_u16(tmp16);
 
-		rte_compiler_barrier();
-
-		if (rxcmp->flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID)
-			mbuf->ol_flags |= PKT_RX_RSS_HASH;
-
-		if (rxcmp1->flags2 &
-		    RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) {
-			mbuf->vlan_tci = rxcmp1->metadata &
-				(RX_PKT_CMPL_METADATA_VID_MASK |
-				RX_PKT_CMPL_METADATA_DE |
-				RX_PKT_CMPL_METADATA_PRI_MASK);
-			mbuf->ol_flags |=
-				PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
-		}
+		/* Store descriptor fields. */
+		vst1q_u32((uint32_t *)&mbuf->rx_descriptor_fields1, pkt_mb);
 
-		bnxt_parse_csum(mbuf, rxcmp1);
 		rx_pkts[nb_rx_pkts++] = mbuf;
 	}
 
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
index 69ffbe4cc9..761d835963 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
@@ -120,50 +120,28 @@ bnxt_parse_pkt_type(__m128i mm_rxcmp, __m128i mm_rxcmp1)
 	return _mm_set_epi32(0, 0, 0, bnxt_ptype_table[index]);
 }
 
-static void
-bnxt_parse_csum(struct rte_mbuf *mbuf, struct rx_pkt_cmpl_hi *rxcmp1)
+static __m128i
+bnxt_set_ol_flags(__m128i mm_rxcmp, __m128i mm_rxcmp1)
 {
-	uint32_t flags;
+	uint16_t flags_type, errors, flags;
+	uint32_t ol_flags;
 
-	flags = flags2_0xf(rxcmp1);
-	/* IP Checksum */
-	if (likely(IS_IP_NONTUNNEL_PKT(flags))) {
-		if (unlikely(RX_CMP_IP_CS_ERROR(rxcmp1)))
-			mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
-		else
-			mbuf->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
-	} else if (IS_IP_TUNNEL_PKT(flags)) {
-		if (unlikely(RX_CMP_IP_OUTER_CS_ERROR(rxcmp1) ||
-			     RX_CMP_IP_CS_ERROR(rxcmp1)))
-			mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
-		else
-			mbuf->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
-	} else if (unlikely(RX_CMP_IP_CS_UNKNOWN(rxcmp1))) {
-		mbuf->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
-	}
+	/* Extract rxcmp1->flags2. */
+	flags = _mm_extract_epi32(mm_rxcmp1, 0) & 0x1F;
+	/* Extract rxcmp->flags_type. */
+	flags_type = _mm_extract_epi16(mm_rxcmp, 0);
+	/* Extract rxcmp1->errors_v2. */
+	errors = (_mm_extract_epi16(mm_rxcmp1, 4) >> 4) & flags & 0xF;
 
-	/* L4 Checksum */
-	if (likely(IS_L4_NONTUNNEL_PKT(flags))) {
-		if (unlikely(RX_CMP_L4_INNER_CS_ERR2(rxcmp1)))
-			mbuf->ol_flags |= PKT_RX_L4_CKSUM_BAD;
-		else
-			mbuf->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
-	} else if (IS_L4_TUNNEL_PKT(flags)) {
-		if (unlikely(RX_CMP_L4_INNER_CS_ERR2(rxcmp1)))
-			mbuf->ol_flags |= PKT_RX_L4_CKSUM_BAD;
-		else
-			mbuf->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
-		if (unlikely(RX_CMP_L4_OUTER_CS_ERR2(rxcmp1))) {
-			mbuf->ol_flags |= PKT_RX_OUTER_L4_CKSUM_BAD;
-		} else if (unlikely(IS_L4_TUNNEL_PKT_ONLY_INNER_L4_CS
-				    (flags))) {
-			mbuf->ol_flags |= PKT_RX_OUTER_L4_CKSUM_UNKNOWN;
-		} else {
-			mbuf->ol_flags |= PKT_RX_OUTER_L4_CKSUM_GOOD;
-		}
-	} else if (unlikely(RX_CMP_L4_CS_UNKNOWN(rxcmp1))) {
-		mbuf->ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
-	}
+	ol_flags = bnxt_ol_flags_table[flags & ~errors];
+
+	if (flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID)
+		ol_flags |= PKT_RX_RSS_HASH;
+
+	if (errors)
+		ol_flags |= bnxt_ol_flags_err_table[errors];
+
+	return _mm_set_epi64x(ol_flags, 0);
 }
 
 uint16_t
@@ -208,7 +186,7 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	for (i = 0; i < nb_pkts; i++) {
 		struct rx_pkt_cmpl_hi *rxcmp1;
 		struct rte_mbuf *mbuf;
-		__m128i mm_rxcmp, mm_rxcmp1, pkt_mb, ptype;
+		__m128i mm_rxcmp, mm_rxcmp1, pkt_mb, ptype, rearm;
 
 		cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
 
@@ -225,35 +203,31 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		cons = rxcmp->opaque;
 
 		mbuf = rxr->rx_buf_ring[cons];
-		rte_prefetch0(mbuf);
 		rxr->rx_buf_ring[cons] = NULL;
 
-		/* Set constant fields from mbuf initializer. */
-		_mm_store_si128((__m128i *)&mbuf->rearm_data, mbuf_init);
+		/* Set fields from mbuf initializer and ol_flags. */
+		rearm = _mm_or_si128(mbuf_init,
+				     bnxt_set_ol_flags(mm_rxcmp, mm_rxcmp1));
+		_mm_store_si128((__m128i *)&mbuf->rearm_data, rearm);
 
 		/* Set mbuf pkt_len, data_len, and rss_hash fields. */
 		pkt_mb = _mm_shuffle_epi8(mm_rxcmp, shuf_msk);
+
+		/* Set packet type. */
 		ptype = bnxt_parse_pkt_type(mm_rxcmp, mm_rxcmp1);
 		pkt_mb = _mm_blend_epi16(pkt_mb, ptype, 0x3);
 
-		_mm_storeu_si128((void *)&mbuf->rx_descriptor_fields1, pkt_mb);
+		/*
+		 * Shift vlan_tci from completion metadata field left six
+		 * bytes and blend into mbuf->rx_descriptor_fields1 to set
+		 * mbuf->vlan_tci.
+		 */
+		pkt_mb = _mm_blend_epi16(pkt_mb,
+					 _mm_slli_si128(mm_rxcmp1, 6), 0x20);
 
-		rte_compiler_barrier();
-
-		if (rxcmp->flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID)
-			mbuf->ol_flags |= PKT_RX_RSS_HASH;
-
-		if (rxcmp1->flags2 &
-		    RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) {
-			mbuf->vlan_tci = rxcmp1->metadata &
-				(RX_PKT_CMPL_METADATA_VID_MASK |
-				RX_PKT_CMPL_METADATA_DE |
-				RX_PKT_CMPL_METADATA_PRI_MASK);
-			mbuf->ol_flags |=
-				PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
-		}
+		/* Store descriptor fields. */
+		_mm_storeu_si128((void *)&mbuf->rx_descriptor_fields1, pkt_mb);
 
-		bnxt_parse_csum(mbuf, rxcmp1);
 		rx_pkts[nb_rx_pkts++] = mbuf;
 	}
 
-- 
2.25.1
^ permalink raw reply	[flat|nested] 22+ messages in thread
- * Re: [dpdk-dev] [PATCH 00/12] net/bnxt: vector PMD improvements
  2020-09-09 15:52 [dpdk-dev] [PATCH 00/12] net/bnxt: vector PMD improvements Lance Richardson
                   ` (8 preceding siblings ...)
  2020-09-09 15:53 ` [dpdk-dev] [PATCH 09/12] net/bnxt: table-based handling for ol flags Lance Richardson
@ 2020-09-11  3:42 ` Ajit Khaparde
  2020-09-11 15:58   ` Ferruh Yigit
  9 siblings, 1 reply; 22+ messages in thread
From: Ajit Khaparde @ 2020-09-11  3:42 UTC (permalink / raw)
  To: Lance Richardson; +Cc: dpdk-dev
On Wed, Sep 9, 2020 at 8:53 AM Lance Richardson <
lance.richardson@broadcom.com> wrote:
> Fixes and optimizations to improve bnxt vector mode
> performance and functionality.
>
Patchset applied to dpdk-next-net-brcm. Thanks
>
> Lance Richardson (12):
>   net/bnxt: fix burst mode get for Arm
>   net/bnxt: fix rxq/txq get information
>   net/bnxt: use appropriate type for Rx mbuf ring
>   net/bnxt: require async cq for vector mode
>   net/bnxt: improve support for small ring sizes
>   net/bnxt: use smaller cq when agg ring not needed
>   net/bnxt: increase max burst size for vector mode
>   net/bnxt: use table-based packet type translation
>   net/bnxt: table-based handling for ol flags
>   net/bnxt: optimize vector mode mbuf allocation
>   net/bnxt: handle multiple packets per loop in vector PMD
>   net/bnxt: transmit vector mode improvements
>
>  drivers/net/bnxt/bnxt.h                 |  19 +-
>  drivers/net/bnxt/bnxt_ethdev.c          |  85 ++--
>  drivers/net/bnxt/bnxt_reps.c            |  21 +-
>  drivers/net/bnxt/bnxt_ring.c            |   4 +-
>  drivers/net/bnxt/bnxt_rxq.c             |  21 +-
>  drivers/net/bnxt/bnxt_rxq.h             |   1 +
>  drivers/net/bnxt/bnxt_rxr.c             | 362 ++++++++------
>  drivers/net/bnxt/bnxt_rxr.h             |  16 +-
>  drivers/net/bnxt/bnxt_rxtx_vec_common.h |  52 +-
>  drivers/net/bnxt/bnxt_rxtx_vec_neon.c   | 524 +++++++++++----------
>  drivers/net/bnxt/bnxt_rxtx_vec_sse.c    | 600 ++++++++++++------------
>  drivers/net/bnxt/bnxt_txq.c             |   7 +-
>  12 files changed, 964 insertions(+), 748 deletions(-)
>
> --
> 2.25.1
>
>
^ permalink raw reply	[flat|nested] 22+ messages in thread