DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] net/octeontx2: perf improvement to tx vector func
@ 2020-01-11 13:54 pbhagavatula
  2020-01-11 13:57 ` [dpdk-dev] [PATCH v2] net/octeontx2: perf improvement to Tx " pbhagavatula
  0 siblings, 1 reply; 3+ messages in thread
From: pbhagavatula @ 2020-01-11 13:54 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K; +Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use scalar loads instead of vector loads for fields
that don't need any vector operations.

Change-Id: I846731cd731d347a7f7e6a2e01d89dfa189ac244
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Reviewed-on: https://sj1git1.cavium.com/21243
Tested-by: sa_ip-toolkits-Jenkins
Tested-by: sa_ip-sw-jenkins <sa_ip-sw-jenkins@marvell.com>
Reviewed-by: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>
Reviewed-by: Jerin Jacob Kollanukkaran <jerinj@marvell.com>
---
 drivers/net/octeontx2/otx2_tx.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/net/octeontx2/otx2_tx.c b/drivers/net/octeontx2/otx2_tx.c
index fa533000e..9f5926560 100644
--- a/drivers/net/octeontx2/otx2_tx.c
+++ b/drivers/net/octeontx2/otx2_tx.c
@@ -112,7 +112,6 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint64x2_t ltypes01, ltypes23;
 	uint64x2_t xtmp128, ytmp128;
 	uint64x2_t xmask01, xmask23;
-	uint64x2_t mbuf01, mbuf23;
 	uint64x2_t cmd00, cmd01;
 	uint64x2_t cmd10, cmd11;
 	uint64x2_t cmd20, cmd21;
@@ -137,9 +136,6 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 	sgdesc23_w0 = sgdesc01_w0;
 
 	for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
-		mbuf01 = vld1q_u64((uint64_t *)tx_pkts);
-		mbuf23 = vld1q_u64((uint64_t *)(tx_pkts + 2));
-
 		/* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
 		senddesc01_w0 = vbicq_u64(senddesc01_w0,
 					  vdupq_n_u64(0xFFFFFFFF));
@@ -149,13 +145,11 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		senddesc23_w0 = senddesc01_w0;
 		sgdesc23_w0 = sgdesc01_w0;
 
-		tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
-
 		/* Move mbufs to iova */
-		mbuf0 = (uint64_t *)vgetq_lane_u64(mbuf01, 0);
-		mbuf1 = (uint64_t *)vgetq_lane_u64(mbuf01, 1);
-		mbuf2 = (uint64_t *)vgetq_lane_u64(mbuf23, 0);
-		mbuf3 = (uint64_t *)vgetq_lane_u64(mbuf23, 1);
+		mbuf0 = (uint64_t *)tx_pkts[0];
+		mbuf1 = (uint64_t *)tx_pkts[1];
+		mbuf2 = (uint64_t *)tx_pkts[2];
+		mbuf3 = (uint64_t *)tx_pkts[3];
 
 		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
 				     offsetof(struct rte_mbuf, buf_iova));
@@ -927,6 +921,7 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 			lmt_status = otx2_lmt_submit(io_addr);
 
 		} while (lmt_status == 0);
+		tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
 	}
 
 	return pkts;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [dpdk-dev] [PATCH v2] net/octeontx2: perf improvement to Tx vector func
  2020-01-11 13:54 [dpdk-dev] [PATCH] net/octeontx2: perf improvement to tx vector func pbhagavatula
@ 2020-01-11 13:57 ` pbhagavatula
  2020-01-14  6:27   ` Jerin Jacob
  0 siblings, 1 reply; 3+ messages in thread
From: pbhagavatula @ 2020-01-11 13:57 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K; +Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use scalar loads instead of vector loads for fields
that don't need any vector operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/net/octeontx2/otx2_tx.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/net/octeontx2/otx2_tx.c b/drivers/net/octeontx2/otx2_tx.c
index fa533000e..9f5926560 100644
--- a/drivers/net/octeontx2/otx2_tx.c
+++ b/drivers/net/octeontx2/otx2_tx.c
@@ -112,7 +112,6 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint64x2_t ltypes01, ltypes23;
 	uint64x2_t xtmp128, ytmp128;
 	uint64x2_t xmask01, xmask23;
-	uint64x2_t mbuf01, mbuf23;
 	uint64x2_t cmd00, cmd01;
 	uint64x2_t cmd10, cmd11;
 	uint64x2_t cmd20, cmd21;
@@ -137,9 +136,6 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 	sgdesc23_w0 = sgdesc01_w0;
 
 	for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
-		mbuf01 = vld1q_u64((uint64_t *)tx_pkts);
-		mbuf23 = vld1q_u64((uint64_t *)(tx_pkts + 2));
-
 		/* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
 		senddesc01_w0 = vbicq_u64(senddesc01_w0,
 					  vdupq_n_u64(0xFFFFFFFF));
@@ -149,13 +145,11 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		senddesc23_w0 = senddesc01_w0;
 		sgdesc23_w0 = sgdesc01_w0;
 
-		tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
-
 		/* Move mbufs to iova */
-		mbuf0 = (uint64_t *)vgetq_lane_u64(mbuf01, 0);
-		mbuf1 = (uint64_t *)vgetq_lane_u64(mbuf01, 1);
-		mbuf2 = (uint64_t *)vgetq_lane_u64(mbuf23, 0);
-		mbuf3 = (uint64_t *)vgetq_lane_u64(mbuf23, 1);
+		mbuf0 = (uint64_t *)tx_pkts[0];
+		mbuf1 = (uint64_t *)tx_pkts[1];
+		mbuf2 = (uint64_t *)tx_pkts[2];
+		mbuf3 = (uint64_t *)tx_pkts[3];
 
 		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
 				     offsetof(struct rte_mbuf, buf_iova));
@@ -927,6 +921,7 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 			lmt_status = otx2_lmt_submit(io_addr);
 
 		} while (lmt_status == 0);
+		tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
 	}
 
 	return pkts;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [dpdk-dev] [PATCH v2] net/octeontx2: perf improvement to Tx vector func
  2020-01-11 13:57 ` [dpdk-dev] [PATCH v2] net/octeontx2: perf improvement to Tx " pbhagavatula
@ 2020-01-14  6:27   ` Jerin Jacob
  0 siblings, 0 replies; 3+ messages in thread
From: Jerin Jacob @ 2020-01-14  6:27 UTC (permalink / raw)
  To: Pavan Nikhilesh, Ferruh Yigit
  Cc: Jerin Jacob, Nithin Dabilpuram, Kiran Kumar K, dpdk-dev

On Sat, Jan 11, 2020 at 7:27 PM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Use scalar loads instead of vector loads for fields
> that don't need any vector operations.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>


Acked-by: Jerin Jacob <jerinj@marvell.com>
Applied to dpdk-next-net-mrvl/master. Thanks

> ---
>  drivers/net/octeontx2/otx2_tx.c | 15 +++++----------
>  1 file changed, 5 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/octeontx2/otx2_tx.c b/drivers/net/octeontx2/otx2_tx.c
> index fa533000e..9f5926560 100644
> --- a/drivers/net/octeontx2/otx2_tx.c
> +++ b/drivers/net/octeontx2/otx2_tx.c
> @@ -112,7 +112,6 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>         uint64x2_t ltypes01, ltypes23;
>         uint64x2_t xtmp128, ytmp128;
>         uint64x2_t xmask01, xmask23;
> -       uint64x2_t mbuf01, mbuf23;
>         uint64x2_t cmd00, cmd01;
>         uint64x2_t cmd10, cmd11;
>         uint64x2_t cmd20, cmd21;
> @@ -137,9 +136,6 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>         sgdesc23_w0 = sgdesc01_w0;
>
>         for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
> -               mbuf01 = vld1q_u64((uint64_t *)tx_pkts);
> -               mbuf23 = vld1q_u64((uint64_t *)(tx_pkts + 2));
> -
>                 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
>                 senddesc01_w0 = vbicq_u64(senddesc01_w0,
>                                           vdupq_n_u64(0xFFFFFFFF));
> @@ -149,13 +145,11 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>                 senddesc23_w0 = senddesc01_w0;
>                 sgdesc23_w0 = sgdesc01_w0;
>
> -               tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
> -
>                 /* Move mbufs to iova */
> -               mbuf0 = (uint64_t *)vgetq_lane_u64(mbuf01, 0);
> -               mbuf1 = (uint64_t *)vgetq_lane_u64(mbuf01, 1);
> -               mbuf2 = (uint64_t *)vgetq_lane_u64(mbuf23, 0);
> -               mbuf3 = (uint64_t *)vgetq_lane_u64(mbuf23, 1);
> +               mbuf0 = (uint64_t *)tx_pkts[0];
> +               mbuf1 = (uint64_t *)tx_pkts[1];
> +               mbuf2 = (uint64_t *)tx_pkts[2];
> +               mbuf3 = (uint64_t *)tx_pkts[3];
>
>                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
>                                      offsetof(struct rte_mbuf, buf_iova));
> @@ -927,6 +921,7 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>                         lmt_status = otx2_lmt_submit(io_addr);
>
>                 } while (lmt_status == 0);
> +               tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
>         }
>
>         return pkts;
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-01-14  6:27 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-11 13:54 [dpdk-dev] [PATCH] net/octeontx2: perf improvement to tx vector func pbhagavatula
2020-01-11 13:57 ` [dpdk-dev] [PATCH v2] net/octeontx2: perf improvement to Tx " pbhagavatula
2020-01-14  6:27   ` Jerin Jacob

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).