* [dpdk-stable] [PATCH 3/4] net/mlx5: optimize Tx doorbell write
[not found] <20181115102859.10928-1-yskoh@mellanox.com>
@ 2018-11-15 10:29 ` Yongseok Koh
2018-11-15 10:29 ` [dpdk-stable] [PATCH 4/4] net/mlx5: optimize Rx buffer replenishment threshold Yongseok Koh
1 sibling, 0 replies; 2+ messages in thread
From: Yongseok Koh @ 2018-11-15 10:29 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: dev, Yongseok Koh, stable
Unnecessary volatile attribute keeps compiler from further optimizing the
code and this results in a little performance drop (~2%). Because of memory
barriers, it is safe to remove.
Fixes: 6bf10ab69be0 ("net/mlx5: support 32-bit systems")
Cc: stable@dpdk.org
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
drivers/net/mlx5/mlx5_rxtx.h | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 59fb43fefe..e210453fe0 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -379,17 +379,16 @@ uint32_t mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t addr,
* Address of the lock to use for that UAR access.
*/
static __rte_always_inline void
-__mlx5_uar_write64_relaxed(uint64_t val, volatile void *addr,
+__mlx5_uar_write64_relaxed(uint64_t val, void *addr,
rte_spinlock_t *lock __rte_unused)
{
#ifdef RTE_ARCH_64
- rte_write64_relaxed(val, addr);
+ *(uint64_t *)addr = val;
#else /* !RTE_ARCH_64 */
rte_spinlock_lock(lock);
- rte_write32_relaxed(val, addr);
+ *(uint32_t *)addr = val;
rte_io_wmb();
- rte_write32_relaxed(val >> 32,
- (volatile void *)((volatile char *)addr + 4));
+ *((uint32_t *)addr + 1) = val >> 32;
rte_spinlock_unlock(lock);
#endif
}
@@ -407,7 +406,7 @@ __mlx5_uar_write64_relaxed(uint64_t val, volatile void *addr,
* Address of the lock to use for that UAR access.
*/
static __rte_always_inline void
-__mlx5_uar_write64(uint64_t val, volatile void *addr, rte_spinlock_t *lock)
+__mlx5_uar_write64(uint64_t val, void *addr, rte_spinlock_t *lock)
{
rte_io_wmb();
__mlx5_uar_write64_relaxed(val, addr, lock);
--
2.11.0
^ permalink raw reply [flat|nested] 2+ messages in thread
* [dpdk-stable] [PATCH 4/4] net/mlx5: optimize Rx buffer replenishment threshold
[not found] <20181115102859.10928-1-yskoh@mellanox.com>
2018-11-15 10:29 ` [dpdk-stable] [PATCH 3/4] net/mlx5: optimize Tx doorbell write Yongseok Koh
@ 2018-11-15 10:29 ` Yongseok Koh
1 sibling, 0 replies; 2+ messages in thread
From: Yongseok Koh @ 2018-11-15 10:29 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: dev, Yongseok Koh, stable
Due to redundant calculation per every burst, performance drops a little.
Fixes: e10245a13b2e ("net/mlx5: fix Rx buffer replenishment threshold")
Cc: stable@dpdk.org
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
drivers/net/mlx5/mlx5_rxq.c | 2 ++
drivers/net/mlx5/mlx5_rxtx.h | 1 +
drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 2 +-
drivers/net/mlx5/mlx5_rxtx_vec_sse.h | 2 +-
4 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index b27fc4798d..183da0e282 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1468,6 +1468,8 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
tmpl->rxq.mp = mp;
tmpl->rxq.stats.idx = idx;
tmpl->rxq.elts_n = log2above(desc);
+ tmpl->rxq.rq_repl_thresh =
+ MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
tmpl->rxq.elts =
(struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
#ifndef RTE_ARCH_64
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index e210453fe0..f47d327cfb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -101,6 +101,7 @@ struct mlx5_rxq_data {
uint16_t consumed_strd; /* Number of consumed strides in WQE. */
uint32_t rq_pi;
uint32_t cq_ci;
+ uint16_t rq_repl_thresh; /* Threshold for buffer replenishment. */
struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */
uint16_t mprq_max_memcpy_len; /* Maximum size of packet to memcpy. */
volatile void *wqes;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index 0b729f1859..883fe1bf91 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -732,7 +732,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
* N - (rq_ci - rq_pi) := # of buffers consumed (to be replenished).
*/
repl_n = q_n - (rxq->rq_ci - rxq->rq_pi);
- if (repl_n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n))
+ if (repl_n >= rxq->rq_repl_thresh)
mlx5_rx_replenish_bulk_mbuf(rxq, repl_n);
/* See if there're unreturned mbufs from compressed CQE. */
rcvd_pkt = rxq->cq_ci - rxq->rq_pi;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index e0f95f923d..14117c4bb4 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -716,7 +716,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
* N - (rq_ci - rq_pi) := # of buffers consumed (to be replenished).
*/
repl_n = q_n - (rxq->rq_ci - rxq->rq_pi);
- if (repl_n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n))
+ if (repl_n >= rxq->rq_repl_thresh)
mlx5_rx_replenish_bulk_mbuf(rxq, repl_n);
/* See if there're unreturned mbufs from compressed CQE. */
rcvd_pkt = rxq->cq_ci - rxq->rq_pi;
--
2.11.0
^ permalink raw reply [flat|nested] 2+ messages in thread