patches for DPDK stable branches
 help / color / mirror / Atom feed
From: Yongseok Koh <yskoh@mellanox.com>
To: yliu@fridaylinux.org
Cc: stable@dpdk.org, shahafs@mellanox.com,
	adrien.mazarguil@6wind.com, nelio.laranjeiro@6wind.com,
	Yongseok Koh <yskoh@mellanox.com>
Subject: [dpdk-stable] [PATCH 03/67] net/mlx5: fix synchronization on polling Rx completions
Date: Mon,  4 Jun 2018 17:10:25 -0700	[thread overview]
Message-ID: <20180605001129.13184-4-yskoh@mellanox.com> (raw)
In-Reply-To: <20180605001129.13184-1-yskoh@mellanox.com>

[ backported from upstream commit 1742c2d9fab07e66209f2d14e7daa50829fc4423 ]

Polling a new packet is basically sensing the generation bit in a
completion entry. For some processors not having strongly-ordered memory
model, there has to be a memory barrier between reading the generation bit
and other fields of the entry in order to guarantee data is not stale.

Fixes: 570acdb1da8a ("net/mlx5: add vectorized Rx/Tx burst for ARM")
Cc: stable@dpdk.org

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_rxtx.c          |  1 +
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 53 ++++++++++++++++++++---------------
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h  |  2 +-
 3 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 9f1478cdb..c2dab1f4c 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1717,6 +1717,7 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 			return 0;
 		++rxq->cq_ci;
 		op_own = cqe->op_own;
+		rte_io_rmb();
 		if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) {
 			volatile struct mlx5_mini_cqe8 (*mc)[8] =
 				(volatile struct mlx5_mini_cqe8 (*)[8])
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index 06f83ef14..0b842f9a0 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -806,6 +806,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 		uint16x4_t mask;
 		uint16x4_t byte_cnt;
 		uint32x4_t ptype_info, flow_tag;
+		register uint64x2_t c0, c1, c2, c3;
 		uint8_t *p0, *p1, *p2, *p3;
 		uint8_t *e0 = (void *)&elts[pos]->pkt_len;
 		uint8_t *e1 = (void *)&elts[pos + 1]->pkt_len;
@@ -822,6 +823,16 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 		p1 = p0 + (pkts_n - pos > 1) * sizeof(struct mlx5_cqe);
 		p2 = p1 + (pkts_n - pos > 2) * sizeof(struct mlx5_cqe);
 		p3 = p2 + (pkts_n - pos > 3) * sizeof(struct mlx5_cqe);
+		/* B.0 (CQE 3) load a block having op_own. */
+		c3 = vld1q_u64((uint64_t *)(p3 + 48));
+		/* B.0 (CQE 2) load a block having op_own. */
+		c2 = vld1q_u64((uint64_t *)(p2 + 48));
+		/* B.0 (CQE 1) load a block having op_own. */
+		c1 = vld1q_u64((uint64_t *)(p1 + 48));
+		/* B.0 (CQE 0) load a block having op_own. */
+		c0 = vld1q_u64((uint64_t *)(p0 + 48));
+		/* Synchronize for loading the rest of blocks. */
+		rte_io_rmb();
 		/* Prefetch next 4 CQEs. */
 		if (pkts_n - pos >= 2 * MLX5_VPMD_DESCS_PER_LOOP) {
 			unsigned int next = pos + MLX5_VPMD_DESCS_PER_LOOP;
@@ -831,50 +842,46 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 			rte_prefetch_non_temporal(&cq[next + 3]);
 		}
 		__asm__ volatile (
-		/* B.1 (CQE 3) load a block having op_own. */
-		"ld1 {v19.16b}, [%[p3]] \n\t"
-		"sub %[p3], %[p3], #48 \n\t"
-		/* B.2 (CQE 3) load the rest blocks. */
+		/* B.1 (CQE 3) load the rest of blocks. */
 		"ld1 {v16.16b - v18.16b}, [%[p3]] \n\t"
+		/* B.2 (CQE 3) move the block having op_own. */
+		"mov v19.16b, %[c3].16b \n\t"
 		/* B.3 (CQE 3) extract 16B fields. */
 		"tbl v23.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
+		/* B.1 (CQE 2) load the rest of blocks. */
+		"ld1 {v16.16b - v18.16b}, [%[p2]] \n\t"
 		/* B.4 (CQE 3) adjust CRC length. */
 		"sub v23.8h, v23.8h, %[crc_adj].8h \n\t"
-		/* B.1 (CQE 2) load a block having op_own. */
-		"ld1 {v19.16b}, [%[p2]] \n\t"
-		"sub %[p2], %[p2], #48 \n\t"
 		/* C.1 (CQE 3) generate final structure for mbuf. */
 		"tbl v15.16b, {v23.16b}, %[mb_shuf_m].16b \n\t"
-		/* B.2 (CQE 2) load the rest blocks. */
-		"ld1 {v16.16b - v18.16b}, [%[p2]] \n\t"
+		/* B.2 (CQE 2) move the block having op_own. */
+		"mov v19.16b, %[c2].16b \n\t"
 		/* B.3 (CQE 2) extract 16B fields. */
 		"tbl v22.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
+		/* B.1 (CQE 1) load the rest of blocks. */
+		"ld1 {v16.16b - v18.16b}, [%[p1]] \n\t"
 		/* B.4 (CQE 2) adjust CRC length. */
 		"sub v22.8h, v22.8h, %[crc_adj].8h \n\t"
-		/* B.1 (CQE 1) load a block having op_own. */
-		"ld1 {v19.16b}, [%[p1]] \n\t"
-		"sub %[p1], %[p1], #48 \n\t"
 		/* C.1 (CQE 2) generate final structure for mbuf. */
 		"tbl v14.16b, {v22.16b}, %[mb_shuf_m].16b \n\t"
-		/* B.2 (CQE 1) load the rest blocks. */
-		"ld1 {v16.16b - v18.16b}, [%[p1]] \n\t"
+		/* B.2 (CQE 1) move the block having op_own. */
+		"mov v19.16b, %[c1].16b \n\t"
 		/* B.3 (CQE 1) extract 16B fields. */
 		"tbl v21.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
+		/* B.1 (CQE 0) load the rest of blocks. */
+		"ld1 {v16.16b - v18.16b}, [%[p0]] \n\t"
 		/* B.4 (CQE 1) adjust CRC length. */
 		"sub v21.8h, v21.8h, %[crc_adj].8h \n\t"
-		/* B.1 (CQE 0) load a block having op_own. */
-		"ld1 {v19.16b}, [%[p0]] \n\t"
-		"sub %[p0], %[p0], #48 \n\t"
 		/* C.1 (CQE 1) generate final structure for mbuf. */
 		"tbl v13.16b, {v21.16b}, %[mb_shuf_m].16b \n\t"
-		/* B.2 (CQE 0) load the rest blocks. */
-		"ld1 {v16.16b - v18.16b}, [%[p0]] \n\t"
+		/* B.2 (CQE 0) move the block having op_own. */
+		"mov v19.16b, %[c0].16b \n\t"
+		/* A.1 load mbuf pointers. */
+		"ld1 {v24.2d - v25.2d}, [%[elts_p]] \n\t"
 		/* B.3 (CQE 0) extract 16B fields. */
 		"tbl v20.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
 		/* B.4 (CQE 0) adjust CRC length. */
 		"sub v20.8h, v20.8h, %[crc_adj].8h \n\t"
-		/* A.1 load mbuf pointers. */
-		"ld1 {v24.2d - v25.2d}, [%[elts_p]] \n\t"
 		/* D.1 extract op_own byte. */
 		"tbl %[op_own].8b, {v20.16b - v23.16b}, %[owner_shuf_m].8b \n\t"
 		/* C.2 (CQE 3) adjust flow mark. */
@@ -909,9 +916,9 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 		 [byte_cnt]"=&w"(byte_cnt),
 		 [ptype_info]"=&w"(ptype_info),
 		 [flow_tag]"=&w"(flow_tag)
-		:[p3]"r"(p3 + 48), [p2]"r"(p2 + 48),
-		 [p1]"r"(p1 + 48), [p0]"r"(p0 + 48),
+		:[p3]"r"(p3), [p2]"r"(p2), [p1]"r"(p1), [p0]"r"(p0),
 		 [e3]"r"(e3), [e2]"r"(e2), [e1]"r"(e1), [e0]"r"(e0),
+		 [c3]"w"(c3), [c2]"w"(c2), [c1]"w"(c1), [c0]"w"(c0),
 		 [elts_p]"r"(elts_p),
 		 [pkts_p]"r"(pkts_p),
 		 [cqe_shuf_m]"w"(cqe_shuf_m),
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index 7ef2c59e1..793142922 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -825,7 +825,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 		/* B.2 copy mbuf pointers. */
 		_mm_storeu_si128((__m128i *)&pkts[pos], mbp1);
 		_mm_storeu_si128((__m128i *)&pkts[pos + 2], mbp2);
-		rte_compiler_barrier();
+		rte_io_rmb();
 		/* C.1 load remained CQE data and extract necessary fields. */
 		cqe_tmp2 = _mm_load_si128((__m128i *)&cq[pos + p3]);
 		cqe_tmp1 = _mm_load_si128((__m128i *)&cq[pos + p2]);
-- 
2.11.0

  parent reply	other threads:[~2018-06-05  0:12 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-05  0:10 [dpdk-stable] [PATCH 00/67] net/mlx5: backport patches for v17.11.3 LTS Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 01/67] net/mlx5: remove get priv internal function Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 02/67] net/mlx4: store RSS hash result in mbufs Yongseok Koh
2018-06-05  0:10 ` Yongseok Koh [this message]
2018-06-05  0:10 ` [dpdk-stable] [PATCH 04/67] net/mlx5: fix allocation when no memory on device NUMA node Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 05/67] net/mlx5: fix flow director conversion Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 06/67] net/mlx5: fix reception of multiple MAC addresses Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 07/67] net/mlx5: fix secondary process mempool registration Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 08/67] net/mlx5: remove assert un-accessible from secondary process Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 09/67] net/mlx5: warn for unsuccessful memory registration Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 10/67] net/mlx5: map UAR address around huge pages Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 11/67] net/mlx4: fix single port configuration Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 12/67] net/mlx4: fix broadcast Rx Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 13/67] net/mlx4: fix removal detection of stopped port Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 14/67] net/mlx5: fix CRC strip capability query Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 15/67] net/mlx5: fix close after start failure Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 16/67] net/mlx: control netdevices through ioctl only Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 17/67] net/mlx5: fix disabling Tx packet inlining Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 18/67] net/mlx5: fix sriov flag Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 19/67] net/mlx5: name parameters in function prototypes Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 20/67] net/mlx5: mark parameters with unused attribute Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 21/67] net/mlx5: normalize function prototypes Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 22/67] net/mlx5: add missing function documentation Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 23/67] net/mlx5: remove useless empty lines Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 24/67] net/mlx5: remove control path locks Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 25/67] net/mlx5: prefix all functions with mlx5 Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 26/67] net/mlx5: change non failing function return values Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 27/67] net/mlx5: standardize on negative errno values Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 28/67] net/mlx5: use port id in PMD log Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 29/67] net/mlx5: use dynamic logging Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 30/67] net/mlx5: remove kernel version check Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 31/67] net/mlx5: change pkt burst select function prototype Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 32/67] net/mlx5: fix link status behavior Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 33/67] net/mlx5: fix link status to use wait to complete Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 34/67] net/mlx5: change tunnel flow priority Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 35/67] net/mlx5: improve flow error explanation Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 36/67] net/mlx5: refuse empty VLAN flow specification Yongseok Koh
2018-06-05  0:10 ` [dpdk-stable] [PATCH 37/67] net/mlx5: fix icc build Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 38/67] net/mlx5: setup RSS regardless of queue count Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 39/67] net/mlx5: enforce RSS key length limitation Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 40/67] net/mlx5: fix RSS key length query Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 41/67] net/mlx4: fix a typo in header file Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 42/67] net/mlx5: remove 32-bit support Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 43/67] net/mlx5: remove excessive data prefetch Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 44/67] net/mlx5: fix link status initialization Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 45/67] net/mlx4: fix RSS resource leak in case of error Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 46/67] net/mlx5: fix RSS flow action bounds check Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 47/67] net/mlx5: fix invalid flow item check Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 48/67] net/mlx5: split L3/L4 in flow director Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 49/67] net/mlx5: fix flow director mask Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 50/67] net/mlx5: fix flow director rule deletion crash Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 51/67] net/mlx4: fix Rx resource leak in case of error Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 52/67] net/mlx5: fix ethtool link setting call order Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 53/67] net/mlx5: fix socket connection return value Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 54/67] net/mlx5: add data-plane debug message macro Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 55/67] net/mlx5: fix probe return value polarity Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 56/67] net/mlx5: fix flow validation Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 57/67] net/mlx4: fix UDP flow rule limitation enforcement Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 58/67] net/mlx5: fix double free on error handling Yongseok Koh
2018-06-05  0:11 ` [dpdk-stable] [PATCH 59/67] net/mlx5: fix resource leak in case of error Yongseok Koh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180605001129.13184-4-yskoh@mellanox.com \
    --to=yskoh@mellanox.com \
    --cc=adrien.mazarguil@6wind.com \
    --cc=nelio.laranjeiro@6wind.com \
    --cc=shahafs@mellanox.com \
    --cc=stable@dpdk.org \
    --cc=yliu@fridaylinux.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).