* [dpdk-dev] [PATCH] net/mlx5: use C11 atomics in packet scheduling @ 2020-10-27 19:03 Viacheslav Ovsiienko 2020-10-28 10:07 ` [dpdk-dev] [PATCH v2] " Viacheslav Ovsiienko 2020-10-28 12:44 ` [dpdk-dev] [PATCH v3] " Viacheslav Ovsiienko 0 siblings, 2 replies; 4+ messages in thread From: Viacheslav Ovsiienko @ 2020-10-27 19:03 UTC (permalink / raw) To: dev; +Cc: rasland The rte_atomic API is deprecated and needs to be replaced with C11 atomic builtins. Use the relaxed ordering and explicit memory barrier for Clock Queue and timestamps synchronization. Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com> --- drivers/net/mlx5/mlx5.h | 14 +++--- drivers/net/mlx5/mlx5_rxtx.h | 11 +++-- drivers/net/mlx5/mlx5_txpp.c | 107 +++++++++++++++++++++++++------------------ 3 files changed, 76 insertions(+), 56 deletions(-) diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 1bd8cd4..bd5b9b4 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -563,8 +563,8 @@ struct mlx5_txpp_wq { /* Tx packet pacing internal timestamp. */ struct mlx5_txpp_ts { - rte_atomic64_t ci_ts; - rte_atomic64_t ts; + uint64_t ci_ts; + uint64_t ts; }; /* Tx packet pacing structure. */ @@ -587,11 +587,11 @@ struct mlx5_dev_txpp { struct mlx5_txpp_ts ts; /* Cached completion id/timestamp. */ uint32_t sync_lost:1; /* ci/timestamp synchronization lost. */ /* Statistics counters. */ - rte_atomic32_t err_miss_int; /* Missed service interrupt. */ - rte_atomic32_t err_rearm_queue; /* Rearm Queue errors. */ - rte_atomic32_t err_clock_queue; /* Clock Queue errors. */ - rte_atomic32_t err_ts_past; /* Timestamp in the past. */ - rte_atomic32_t err_ts_future; /* Timestamp in the distant future. */ + uint64_t err_miss_int; /* Missed service interrupt. */ + uint64_t err_rearm_queue; /* Rearm Queue errors. */ + uint64_t err_clock_queue; /* Clock Queue errors. */ + uint64_t err_ts_past; /* Timestamp in the past. */ + uint64_t err_ts_future; /* Timestamp in the distant future. */ }; /* Supported flex parser profile ID. */ diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index 1b5fba4..84eaef7 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -14,7 +14,6 @@ #include <rte_mempool.h> #include <rte_common.h> #include <rte_hexdump.h> -#include <rte_atomic.h> #include <rte_spinlock.h> #include <rte_io.h> #include <rte_bus_pci.h> @@ -677,8 +676,8 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, uint64_t iova, * the service thread, data should be re-read. */ rte_compiler_barrier(); - ci = rte_atomic64_read(&sh->txpp.ts.ci_ts); - ts = rte_atomic64_read(&sh->txpp.ts.ts); + ci = __atomic_load_n(&sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); + ts = __atomic_load_n(&sh->txpp.ts.ts, __ATOMIC_RELAXED); rte_compiler_barrier(); if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) break; @@ -688,7 +687,8 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, uint64_t iova, mts -= ts; if (unlikely(mts >= UINT64_MAX / 2)) { /* We have negative integer, mts is in the past. */ - rte_atomic32_inc(&sh->txpp.err_ts_past); + __atomic_fetch_add(&sh->txpp.err_ts_past, + 1, __ATOMIC_RELAXED); return -1; } tick = sh->txpp.tick; @@ -697,7 +697,8 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, uint64_t iova, mts = (mts + tick - 1) / tick; if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { /* We have mts is too distant future. */ - rte_atomic32_inc(&sh->txpp.err_ts_future); + __atomic_fetch_add(&sh->txpp.err_ts_future, + 1, __ATOMIC_RELAXED); return -1; } mts <<= 64 - MLX5_CQ_INDEX_WIDTH; diff --git a/drivers/net/mlx5/mlx5_txpp.c b/drivers/net/mlx5/mlx5_txpp.c index 37355fa..6fb51ca 100644 --- a/drivers/net/mlx5/mlx5_txpp.c +++ b/drivers/net/mlx5/mlx5_txpp.c @@ -650,29 +650,34 @@ * update by hardware with soecified rate. We have to * read timestump and WQE completion index atomically. */ -#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64) +#if defined(RTE_ARCH_X86_64) rte_int128_t src; memset(&src, 0, sizeof(src)); *ts = src; /* if (*from == *ts) *from = *src else *ts = *from; */ - rte_atomic128_cmp_exchange(from, ts, &src, 0, - __ATOMIC_RELAXED, __ATOMIC_RELAXED); + __atomic_compare_exchange_n((__uint128_t *)from, + (__uint128_t *)ts, + (__uint128_t *)&src, + 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); #else - rte_atomic64_t *cqe = (rte_atomic64_t *)from; + uint64_t *cqe = (uint64_t *)from; - /* Power architecture does not support 16B compare-and-swap. */ + /* + * Power architecture does not support 16B compare-and-swap. + * ARM implements it in software, code below is more relevant. + */ for (;;) { int64_t tm, op; int64_t *ps; rte_compiler_barrier(); - tm = rte_atomic64_read(cqe + 0); - op = rte_atomic64_read(cqe + 1); + tm = __atomic_load_n(cqe + 0, __ATOMIC_RELAXED); + op = __atomic_load_n(cqe + 1, __ATOMIC_RELAXED); rte_compiler_barrier(); - if (tm != rte_atomic64_read(cqe + 0)) + if (tm != __atomic_load_n(cqe + 0, __ATOMIC_RELAXED)) continue; - if (op != rte_atomic64_read(cqe + 1)) + if (op != __atomic_load_n(cqe + 1, __ATOMIC_RELAXED)) continue; ps = (int64_t *)ts; ps[0] = tm; @@ -690,8 +695,8 @@ ci = ci << (64 - MLX5_CQ_INDEX_WIDTH); ci |= (ts << MLX5_CQ_INDEX_WIDTH) >> MLX5_CQ_INDEX_WIDTH; rte_compiler_barrier(); - rte_atomic64_set(&sh->txpp.ts.ts, ts); - rte_atomic64_set(&sh->txpp.ts.ci_ts, ci); + __atomic_store_n(&sh->txpp.ts.ts, ts, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.ts.ci_ts, ci, __ATOMIC_RELAXED); rte_wmb(); } @@ -713,7 +718,8 @@ mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); if (to.cts.op_own >> 4) { DRV_LOG(DEBUG, "Clock Queue error sync lost."); - rte_atomic32_inc(&sh->txpp.err_clock_queue); + __atomic_fetch_add(&sh->txpp.err_clock_queue, + 1, __ATOMIC_RELAXED); sh->txpp.sync_lost = 1; return; } @@ -758,7 +764,10 @@ if (!sh->txpp.clock_queue.sq_ci && !sh->txpp.ts_n) return; MLX5_ASSERT(sh->txpp.ts_p < MLX5_TXPP_REARM_SQ_SIZE); - sh->txpp.tsa[sh->txpp.ts_p] = sh->txpp.ts; + __atomic_store_n(&sh->txpp.tsa[sh->txpp.ts_p].ts, + sh->txpp.ts.ts, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.tsa[sh->txpp.ts_p].ci_ts, + sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); if (++sh->txpp.ts_p >= MLX5_TXPP_REARM_SQ_SIZE) sh->txpp.ts_p = 0; if (sh->txpp.ts_n < MLX5_TXPP_REARM_SQ_SIZE) @@ -799,7 +808,8 @@ /* Check whether we have missed interrupts. */ if (cq_ci - wq->cq_ci != 1) { DRV_LOG(DEBUG, "Rearm Queue missed interrupt."); - rte_atomic32_inc(&sh->txpp.err_miss_int); + __atomic_fetch_add(&sh->txpp.err_miss_int, + 1, __ATOMIC_RELAXED); /* Check sync lost on wqe index. */ if (cq_ci - wq->cq_ci >= (((1UL << MLX5_WQ_INDEX_WIDTH) / @@ -814,7 +824,8 @@ /* Fire new requests to Rearm Queue. */ if (error) { DRV_LOG(DEBUG, "Rearm Queue error sync lost."); - rte_atomic32_inc(&sh->txpp.err_rearm_queue); + __atomic_fetch_add(&sh->txpp.err_rearm_queue, + 1, __ATOMIC_RELAXED); sh->txpp.sync_lost = 1; } } @@ -877,11 +888,11 @@ int ret; int fd; - rte_atomic32_set(&sh->txpp.err_miss_int, 0); - rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); - rte_atomic32_set(&sh->txpp.err_clock_queue, 0); - rte_atomic32_set(&sh->txpp.err_ts_past, 0); - rte_atomic32_set(&sh->txpp.err_ts_future, 0); + sh->txpp.err_miss_int = 0; + sh->txpp.err_rearm_queue = 0; + sh->txpp.err_clock_queue = 0; + sh->txpp.err_ts_past = 0; + sh->txpp.err_ts_future = 0; /* Attach interrupt handler to process Rearm Queue completions. */ fd = mlx5_os_get_devx_channel_fd(sh->txpp.echan); ret = mlx5_os_set_nonblock_channel_fd(fd); @@ -1116,7 +1127,8 @@ mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); if (to.cts.op_own >> 4) { DRV_LOG(DEBUG, "Clock Queue error sync lost."); - rte_atomic32_inc(&sh->txpp.err_clock_queue); + __atomic_fetch_add(&sh->txpp.err_clock_queue, + 1, __ATOMIC_RELAXED); sh->txpp.sync_lost = 1; return -EIO; } @@ -1147,11 +1159,11 @@ int mlx5_txpp_xstats_reset(struct rte_eth_dev *dev) struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_dev_ctx_shared *sh = priv->sh; - rte_atomic32_set(&sh->txpp.err_miss_int, 0); - rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); - rte_atomic32_set(&sh->txpp.err_clock_queue, 0); - rte_atomic32_set(&sh->txpp.err_ts_past, 0); - rte_atomic32_set(&sh->txpp.err_ts_future, 0); + __atomic_store_n(&sh->txpp.err_miss_int, 0, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.err_rearm_queue, 0, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.err_clock_queue, 0, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.err_ts_past, 0, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.err_ts_future, 0, __ATOMIC_RELAXED); return 0; } @@ -1196,19 +1208,21 @@ int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, struct mlx5_txpp_ts *tsa, uint16_t idx) { do { - int64_t ts, ci; + uint64_t ts, ci; - ts = rte_atomic64_read(&txpp->tsa[idx].ts); - ci = rte_atomic64_read(&txpp->tsa[idx].ci_ts); + ts = __atomic_load_n(&txpp->tsa[idx].ts, __ATOMIC_RELAXED); + ci = __atomic_load_n(&txpp->tsa[idx].ci_ts, __ATOMIC_RELAXED); rte_compiler_barrier(); if ((ci ^ ts) << MLX5_CQ_INDEX_WIDTH != 0) continue; - if (rte_atomic64_read(&txpp->tsa[idx].ts) != ts) + if (__atomic_load_n(&txpp->tsa[idx].ts, + __ATOMIC_RELAXED) != ts) continue; - if (rte_atomic64_read(&txpp->tsa[idx].ci_ts) != ci) + if (__atomic_load_n(&txpp->tsa[idx].ci_ts, + __ATOMIC_RELAXED) != ci) continue; - rte_atomic64_set(&tsa->ts, ts); - rte_atomic64_set(&tsa->ci_ts, ci); + tsa->ts = ts; + tsa->ci_ts = ci; return; } while (true); } @@ -1244,9 +1258,9 @@ int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, rte_compiler_barrier(); } while (ts_p != txpp->ts_p); /* We have two neighbor reports, calculate the jitter. */ - dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); - dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)) - - (rte_atomic64_read(&tsa0.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)); + dts = tsa1.ts - tsa0.ts; + dci = (tsa1.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)) - + (tsa0.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)); if (dci < 0) dci += 1 << MLX5_CQ_INDEX_WIDTH; dci *= txpp->tick; @@ -1284,9 +1298,9 @@ int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, rte_compiler_barrier(); } while (ts_p != txpp->ts_p); /* We have two neighbor reports, calculate the jitter. */ - dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); - dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)) - - (rte_atomic64_read(&tsa0.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)); + dts = tsa1.ts - tsa0.ts; + dci = (tsa1.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)) - + (tsa0.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)); dci += 1 << MLX5_CQ_INDEX_WIDTH; dci *= txpp->tick; return (dts > dci) ? dts - dci : dci - dts; @@ -1325,15 +1339,20 @@ int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, for (i = 0; i < n_txpp; ++i) stats[n_used + i].id = n_used + i; stats[n_used + 0].value = - rte_atomic32_read(&sh->txpp.err_miss_int); + __atomic_load_n(&sh->txpp.err_miss_int, + __ATOMIC_RELAXED); stats[n_used + 1].value = - rte_atomic32_read(&sh->txpp.err_rearm_queue); + __atomic_load_n(&sh->txpp.err_rearm_queue, + __ATOMIC_RELAXED); stats[n_used + 2].value = - rte_atomic32_read(&sh->txpp.err_clock_queue); + __atomic_load_n(&sh->txpp.err_clock_queue, + __ATOMIC_RELAXED); stats[n_used + 3].value = - rte_atomic32_read(&sh->txpp.err_ts_past); + __atomic_load_n(&sh->txpp.err_ts_past, + __ATOMIC_RELAXED); stats[n_used + 4].value = - rte_atomic32_read(&sh->txpp.err_ts_future); + __atomic_load_n(&sh->txpp.err_ts_future, + __ATOMIC_RELAXED); stats[n_used + 5].value = mlx5_txpp_xstats_jitter(&sh->txpp); stats[n_used + 6].value = mlx5_txpp_xstats_wander(&sh->txpp); stats[n_used + 7].value = sh->txpp.sync_lost; -- 1.8.3.1 ^ permalink raw reply [flat|nested] 4+ messages in thread
* [dpdk-dev] [PATCH v2] net/mlx5: use C11 atomics in packet scheduling 2020-10-27 19:03 [dpdk-dev] [PATCH] net/mlx5: use C11 atomics in packet scheduling Viacheslav Ovsiienko @ 2020-10-28 10:07 ` Viacheslav Ovsiienko 2020-10-28 12:44 ` [dpdk-dev] [PATCH v3] " Viacheslav Ovsiienko 1 sibling, 0 replies; 4+ messages in thread From: Viacheslav Ovsiienko @ 2020-10-28 10:07 UTC (permalink / raw) To: dev; +Cc: rasland The rte_atomic API is deprecated and needs to be replaced with C11 atomic builtins. Use the relaxed ordering and explicit memory barrier for Clock Queue and timestamps synchronization. Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com> --- v1: http://patches.dpdk.org/patch/82362/ v2: fixed the issue of missing __atomic_compare_exchange_16, replaced with local copy of existing DPDK implementation of rte_atomic12_compare_exchange being deprecated drivers/net/mlx5/mlx5.h | 14 ++--- drivers/net/mlx5/mlx5_rxtx.h | 11 ++-- drivers/net/mlx5/mlx5_txpp.c | 130 ++++++++++++++++++++++++++++--------------- 3 files changed, 99 insertions(+), 56 deletions(-) diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 1bd8cd4..bd5b9b4 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -563,8 +563,8 @@ struct mlx5_txpp_wq { /* Tx packet pacing internal timestamp. */ struct mlx5_txpp_ts { - rte_atomic64_t ci_ts; - rte_atomic64_t ts; + uint64_t ci_ts; + uint64_t ts; }; /* Tx packet pacing structure. */ @@ -587,11 +587,11 @@ struct mlx5_dev_txpp { struct mlx5_txpp_ts ts; /* Cached completion id/timestamp. */ uint32_t sync_lost:1; /* ci/timestamp synchronization lost. */ /* Statistics counters. */ - rte_atomic32_t err_miss_int; /* Missed service interrupt. */ - rte_atomic32_t err_rearm_queue; /* Rearm Queue errors. */ - rte_atomic32_t err_clock_queue; /* Clock Queue errors. */ - rte_atomic32_t err_ts_past; /* Timestamp in the past. */ - rte_atomic32_t err_ts_future; /* Timestamp in the distant future. */ + uint64_t err_miss_int; /* Missed service interrupt. */ + uint64_t err_rearm_queue; /* Rearm Queue errors. */ + uint64_t err_clock_queue; /* Clock Queue errors. */ + uint64_t err_ts_past; /* Timestamp in the past. */ + uint64_t err_ts_future; /* Timestamp in the distant future. */ }; /* Supported flex parser profile ID. */ diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index 1b5fba4..84eaef7 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -14,7 +14,6 @@ #include <rte_mempool.h> #include <rte_common.h> #include <rte_hexdump.h> -#include <rte_atomic.h> #include <rte_spinlock.h> #include <rte_io.h> #include <rte_bus_pci.h> @@ -677,8 +676,8 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, uint64_t iova, * the service thread, data should be re-read. */ rte_compiler_barrier(); - ci = rte_atomic64_read(&sh->txpp.ts.ci_ts); - ts = rte_atomic64_read(&sh->txpp.ts.ts); + ci = __atomic_load_n(&sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); + ts = __atomic_load_n(&sh->txpp.ts.ts, __ATOMIC_RELAXED); rte_compiler_barrier(); if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) break; @@ -688,7 +687,8 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, uint64_t iova, mts -= ts; if (unlikely(mts >= UINT64_MAX / 2)) { /* We have negative integer, mts is in the past. */ - rte_atomic32_inc(&sh->txpp.err_ts_past); + __atomic_fetch_add(&sh->txpp.err_ts_past, + 1, __ATOMIC_RELAXED); return -1; } tick = sh->txpp.tick; @@ -697,7 +697,8 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, uint64_t iova, mts = (mts + tick - 1) / tick; if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { /* We have mts is too distant future. */ - rte_atomic32_inc(&sh->txpp.err_ts_future); + __atomic_fetch_add(&sh->txpp.err_ts_future, + 1, __ATOMIC_RELAXED); return -1; } mts <<= 64 - MLX5_CQ_INDEX_WIDTH; diff --git a/drivers/net/mlx5/mlx5_txpp.c b/drivers/net/mlx5/mlx5_txpp.c index 37355fa..442c20d 100644 --- a/drivers/net/mlx5/mlx5_txpp.c +++ b/drivers/net/mlx5/mlx5_txpp.c @@ -642,6 +642,32 @@ aq->arm_sn++; } +#if defined(RTE_ARCH_X86_64) +static inline int +mlx5_atomic128_cas(rte_int128_t *dst, + rte_int128_t *exp, + const rte_int128_t *src) +{ + uint8_t res; + + asm volatile (MPLOCKED + "cmpxchg16b %[dst];" + " sete %[res]" + : [dst] "=m" (dst->val[0]), + "=a" (exp->val[0]), + "=d" (exp->val[1]), + [res] "=r" (res) + : "b" (src->val[0]), + "c" (src->val[1]), + "a" (exp->val[0]), + "d" (exp->val[1]), + "m" (dst->val[0]) + : "memory"); + + return res; +} +#endif + static inline void mlx5_atomic_read_cqe(rte_int128_t *from, rte_int128_t *ts) { @@ -650,29 +676,31 @@ * update by hardware with soecified rate. We have to * read timestump and WQE completion index atomically. */ -#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64) +#if defined(RTE_ARCH_X86_64) rte_int128_t src; memset(&src, 0, sizeof(src)); *ts = src; /* if (*from == *ts) *from = *src else *ts = *from; */ - rte_atomic128_cmp_exchange(from, ts, &src, 0, - __ATOMIC_RELAXED, __ATOMIC_RELAXED); + mlx5_atomic128_cas(from, ts, &src); #else - rte_atomic64_t *cqe = (rte_atomic64_t *)from; + uint64_t *cqe = (uint64_t *)from; - /* Power architecture does not support 16B compare-and-swap. */ + /* + * Power architecture does not support 16B compare-and-swap. + * ARM implements it in software, code below is more relevant. + */ for (;;) { int64_t tm, op; int64_t *ps; rte_compiler_barrier(); - tm = rte_atomic64_read(cqe + 0); - op = rte_atomic64_read(cqe + 1); + tm = __atomic_load_n(cqe + 0, __ATOMIC_RELAXED); + op = __atomic_load_n(cqe + 1, __ATOMIC_RELAXED); rte_compiler_barrier(); - if (tm != rte_atomic64_read(cqe + 0)) + if (tm != __atomic_load_n(cqe + 0, __ATOMIC_RELAXED)) continue; - if (op != rte_atomic64_read(cqe + 1)) + if (op != __atomic_load_n(cqe + 1, __ATOMIC_RELAXED)) continue; ps = (int64_t *)ts; ps[0] = tm; @@ -690,8 +718,8 @@ ci = ci << (64 - MLX5_CQ_INDEX_WIDTH); ci |= (ts << MLX5_CQ_INDEX_WIDTH) >> MLX5_CQ_INDEX_WIDTH; rte_compiler_barrier(); - rte_atomic64_set(&sh->txpp.ts.ts, ts); - rte_atomic64_set(&sh->txpp.ts.ci_ts, ci); + __atomic_store_n(&sh->txpp.ts.ts, ts, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.ts.ci_ts, ci, __ATOMIC_RELAXED); rte_wmb(); } @@ -713,7 +741,8 @@ mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); if (to.cts.op_own >> 4) { DRV_LOG(DEBUG, "Clock Queue error sync lost."); - rte_atomic32_inc(&sh->txpp.err_clock_queue); + __atomic_fetch_add(&sh->txpp.err_clock_queue, + 1, __ATOMIC_RELAXED); sh->txpp.sync_lost = 1; return; } @@ -758,7 +787,10 @@ if (!sh->txpp.clock_queue.sq_ci && !sh->txpp.ts_n) return; MLX5_ASSERT(sh->txpp.ts_p < MLX5_TXPP_REARM_SQ_SIZE); - sh->txpp.tsa[sh->txpp.ts_p] = sh->txpp.ts; + __atomic_store_n(&sh->txpp.tsa[sh->txpp.ts_p].ts, + sh->txpp.ts.ts, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.tsa[sh->txpp.ts_p].ci_ts, + sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); if (++sh->txpp.ts_p >= MLX5_TXPP_REARM_SQ_SIZE) sh->txpp.ts_p = 0; if (sh->txpp.ts_n < MLX5_TXPP_REARM_SQ_SIZE) @@ -799,7 +831,8 @@ /* Check whether we have missed interrupts. */ if (cq_ci - wq->cq_ci != 1) { DRV_LOG(DEBUG, "Rearm Queue missed interrupt."); - rte_atomic32_inc(&sh->txpp.err_miss_int); + __atomic_fetch_add(&sh->txpp.err_miss_int, + 1, __ATOMIC_RELAXED); /* Check sync lost on wqe index. */ if (cq_ci - wq->cq_ci >= (((1UL << MLX5_WQ_INDEX_WIDTH) / @@ -814,7 +847,8 @@ /* Fire new requests to Rearm Queue. */ if (error) { DRV_LOG(DEBUG, "Rearm Queue error sync lost."); - rte_atomic32_inc(&sh->txpp.err_rearm_queue); + __atomic_fetch_add(&sh->txpp.err_rearm_queue, + 1, __ATOMIC_RELAXED); sh->txpp.sync_lost = 1; } } @@ -877,11 +911,11 @@ int ret; int fd; - rte_atomic32_set(&sh->txpp.err_miss_int, 0); - rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); - rte_atomic32_set(&sh->txpp.err_clock_queue, 0); - rte_atomic32_set(&sh->txpp.err_ts_past, 0); - rte_atomic32_set(&sh->txpp.err_ts_future, 0); + sh->txpp.err_miss_int = 0; + sh->txpp.err_rearm_queue = 0; + sh->txpp.err_clock_queue = 0; + sh->txpp.err_ts_past = 0; + sh->txpp.err_ts_future = 0; /* Attach interrupt handler to process Rearm Queue completions. */ fd = mlx5_os_get_devx_channel_fd(sh->txpp.echan); ret = mlx5_os_set_nonblock_channel_fd(fd); @@ -1116,7 +1150,8 @@ mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); if (to.cts.op_own >> 4) { DRV_LOG(DEBUG, "Clock Queue error sync lost."); - rte_atomic32_inc(&sh->txpp.err_clock_queue); + __atomic_fetch_add(&sh->txpp.err_clock_queue, + 1, __ATOMIC_RELAXED); sh->txpp.sync_lost = 1; return -EIO; } @@ -1147,11 +1182,11 @@ int mlx5_txpp_xstats_reset(struct rte_eth_dev *dev) struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_dev_ctx_shared *sh = priv->sh; - rte_atomic32_set(&sh->txpp.err_miss_int, 0); - rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); - rte_atomic32_set(&sh->txpp.err_clock_queue, 0); - rte_atomic32_set(&sh->txpp.err_ts_past, 0); - rte_atomic32_set(&sh->txpp.err_ts_future, 0); + __atomic_store_n(&sh->txpp.err_miss_int, 0, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.err_rearm_queue, 0, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.err_clock_queue, 0, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.err_ts_past, 0, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.err_ts_future, 0, __ATOMIC_RELAXED); return 0; } @@ -1196,19 +1231,21 @@ int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, struct mlx5_txpp_ts *tsa, uint16_t idx) { do { - int64_t ts, ci; + uint64_t ts, ci; - ts = rte_atomic64_read(&txpp->tsa[idx].ts); - ci = rte_atomic64_read(&txpp->tsa[idx].ci_ts); + ts = __atomic_load_n(&txpp->tsa[idx].ts, __ATOMIC_RELAXED); + ci = __atomic_load_n(&txpp->tsa[idx].ci_ts, __ATOMIC_RELAXED); rte_compiler_barrier(); if ((ci ^ ts) << MLX5_CQ_INDEX_WIDTH != 0) continue; - if (rte_atomic64_read(&txpp->tsa[idx].ts) != ts) + if (__atomic_load_n(&txpp->tsa[idx].ts, + __ATOMIC_RELAXED) != ts) continue; - if (rte_atomic64_read(&txpp->tsa[idx].ci_ts) != ci) + if (__atomic_load_n(&txpp->tsa[idx].ci_ts, + __ATOMIC_RELAXED) != ci) continue; - rte_atomic64_set(&tsa->ts, ts); - rte_atomic64_set(&tsa->ci_ts, ci); + tsa->ts = ts; + tsa->ci_ts = ci; return; } while (true); } @@ -1244,9 +1281,9 @@ int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, rte_compiler_barrier(); } while (ts_p != txpp->ts_p); /* We have two neighbor reports, calculate the jitter. */ - dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); - dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)) - - (rte_atomic64_read(&tsa0.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)); + dts = tsa1.ts - tsa0.ts; + dci = (tsa1.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)) - + (tsa0.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)); if (dci < 0) dci += 1 << MLX5_CQ_INDEX_WIDTH; dci *= txpp->tick; @@ -1284,9 +1321,9 @@ int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, rte_compiler_barrier(); } while (ts_p != txpp->ts_p); /* We have two neighbor reports, calculate the jitter. */ - dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); - dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)) - - (rte_atomic64_read(&tsa0.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)); + dts = tsa1.ts - tsa0.ts; + dci = (tsa1.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)) - + (tsa0.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)); dci += 1 << MLX5_CQ_INDEX_WIDTH; dci *= txpp->tick; return (dts > dci) ? dts - dci : dci - dts; @@ -1325,15 +1362,20 @@ int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, for (i = 0; i < n_txpp; ++i) stats[n_used + i].id = n_used + i; stats[n_used + 0].value = - rte_atomic32_read(&sh->txpp.err_miss_int); + __atomic_load_n(&sh->txpp.err_miss_int, + __ATOMIC_RELAXED); stats[n_used + 1].value = - rte_atomic32_read(&sh->txpp.err_rearm_queue); + __atomic_load_n(&sh->txpp.err_rearm_queue, + __ATOMIC_RELAXED); stats[n_used + 2].value = - rte_atomic32_read(&sh->txpp.err_clock_queue); + __atomic_load_n(&sh->txpp.err_clock_queue, + __ATOMIC_RELAXED); stats[n_used + 3].value = - rte_atomic32_read(&sh->txpp.err_ts_past); + __atomic_load_n(&sh->txpp.err_ts_past, + __ATOMIC_RELAXED); stats[n_used + 4].value = - rte_atomic32_read(&sh->txpp.err_ts_future); + __atomic_load_n(&sh->txpp.err_ts_future, + __ATOMIC_RELAXED); stats[n_used + 5].value = mlx5_txpp_xstats_jitter(&sh->txpp); stats[n_used + 6].value = mlx5_txpp_xstats_wander(&sh->txpp); stats[n_used + 7].value = sh->txpp.sync_lost; -- 1.8.3.1 ^ permalink raw reply [flat|nested] 4+ messages in thread
* [dpdk-dev] [PATCH v3] net/mlx5: use C11 atomics in packet scheduling 2020-10-27 19:03 [dpdk-dev] [PATCH] net/mlx5: use C11 atomics in packet scheduling Viacheslav Ovsiienko 2020-10-28 10:07 ` [dpdk-dev] [PATCH v2] " Viacheslav Ovsiienko @ 2020-10-28 12:44 ` Viacheslav Ovsiienko 2020-10-28 22:00 ` Raslan Darawsheh 1 sibling, 1 reply; 4+ messages in thread From: Viacheslav Ovsiienko @ 2020-10-28 12:44 UTC (permalink / raw) To: dev; +Cc: rasland The rte_atomic API is deprecated and needs to be replaced with C11 atomic builtins. Use the relaxed ordering and explicit memory barrier for Clock Queue and timestamps synchronization. Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com> --- v1: http://patches.dpdk.org/patch/82362/ v2: http://patches.dpdk.org/patch/82612/ fixed the issue of missing __atomic_compare_exchange_16, replaced with local copy of existing DPDK implementation of rte_atomic12_compare_exchange being deprecated v3: minor compilation and codestyle issues drivers/net/mlx5/mlx5.h | 14 ++--- drivers/net/mlx5/mlx5_rxtx.h | 11 ++-- drivers/net/mlx5/mlx5_txpp.c | 136 ++++++++++++++++++++++++++++--------------- 3 files changed, 102 insertions(+), 59 deletions(-) diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 8de5842..570cc2b 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -563,8 +563,8 @@ struct mlx5_txpp_wq { /* Tx packet pacing internal timestamp. */ struct mlx5_txpp_ts { - rte_atomic64_t ci_ts; - rte_atomic64_t ts; + uint64_t ci_ts; + uint64_t ts; }; /* Tx packet pacing structure. */ @@ -587,11 +587,11 @@ struct mlx5_dev_txpp { struct mlx5_txpp_ts ts; /* Cached completion id/timestamp. */ uint32_t sync_lost:1; /* ci/timestamp synchronization lost. */ /* Statistics counters. */ - rte_atomic32_t err_miss_int; /* Missed service interrupt. */ - rte_atomic32_t err_rearm_queue; /* Rearm Queue errors. */ - rte_atomic32_t err_clock_queue; /* Clock Queue errors. */ - rte_atomic32_t err_ts_past; /* Timestamp in the past. */ - rte_atomic32_t err_ts_future; /* Timestamp in the distant future. */ + uint64_t err_miss_int; /* Missed service interrupt. */ + uint64_t err_rearm_queue; /* Rearm Queue errors. */ + uint64_t err_clock_queue; /* Clock Queue errors. */ + uint64_t err_ts_past; /* Timestamp in the past. */ + uint64_t err_ts_future; /* Timestamp in the distant future. */ }; /* Supported flex parser profile ID. */ diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index 1b5fba4..84eaef7 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -14,7 +14,6 @@ #include <rte_mempool.h> #include <rte_common.h> #include <rte_hexdump.h> -#include <rte_atomic.h> #include <rte_spinlock.h> #include <rte_io.h> #include <rte_bus_pci.h> @@ -677,8 +676,8 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, uint64_t iova, * the service thread, data should be re-read. */ rte_compiler_barrier(); - ci = rte_atomic64_read(&sh->txpp.ts.ci_ts); - ts = rte_atomic64_read(&sh->txpp.ts.ts); + ci = __atomic_load_n(&sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); + ts = __atomic_load_n(&sh->txpp.ts.ts, __ATOMIC_RELAXED); rte_compiler_barrier(); if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) break; @@ -688,7 +687,8 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, uint64_t iova, mts -= ts; if (unlikely(mts >= UINT64_MAX / 2)) { /* We have negative integer, mts is in the past. */ - rte_atomic32_inc(&sh->txpp.err_ts_past); + __atomic_fetch_add(&sh->txpp.err_ts_past, + 1, __ATOMIC_RELAXED); return -1; } tick = sh->txpp.tick; @@ -697,7 +697,8 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, uint64_t iova, mts = (mts + tick - 1) / tick; if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { /* We have mts is too distant future. */ - rte_atomic32_inc(&sh->txpp.err_ts_future); + __atomic_fetch_add(&sh->txpp.err_ts_future, + 1, __ATOMIC_RELAXED); return -1; } mts <<= 64 - MLX5_CQ_INDEX_WIDTH; diff --git a/drivers/net/mlx5/mlx5_txpp.c b/drivers/net/mlx5/mlx5_txpp.c index 37355fa..8aad92f 100644 --- a/drivers/net/mlx5/mlx5_txpp.c +++ b/drivers/net/mlx5/mlx5_txpp.c @@ -642,6 +642,32 @@ aq->arm_sn++; } +#if defined(RTE_ARCH_X86_64) +static inline int +mlx5_atomic128_compare_exchange(rte_int128_t *dst, + rte_int128_t *exp, + const rte_int128_t *src) +{ + uint8_t res; + + asm volatile (MPLOCKED + "cmpxchg16b %[dst];" + " sete %[res]" + : [dst] "=m" (dst->val[0]), + "=a" (exp->val[0]), + "=d" (exp->val[1]), + [res] "=r" (res) + : "b" (src->val[0]), + "c" (src->val[1]), + "a" (exp->val[0]), + "d" (exp->val[1]), + "m" (dst->val[0]) + : "memory"); + + return res; +} +#endif + static inline void mlx5_atomic_read_cqe(rte_int128_t *from, rte_int128_t *ts) { @@ -650,31 +676,33 @@ * update by hardware with soecified rate. We have to * read timestump and WQE completion index atomically. */ -#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64) +#if defined(RTE_ARCH_X86_64) rte_int128_t src; memset(&src, 0, sizeof(src)); *ts = src; /* if (*from == *ts) *from = *src else *ts = *from; */ - rte_atomic128_cmp_exchange(from, ts, &src, 0, - __ATOMIC_RELAXED, __ATOMIC_RELAXED); + mlx5_atomic128_compare_exchange(from, ts, &src); #else - rte_atomic64_t *cqe = (rte_atomic64_t *)from; + uint64_t *cqe = (uint64_t *)from; - /* Power architecture does not support 16B compare-and-swap. */ + /* + * Power architecture does not support 16B compare-and-swap. + * ARM implements it in software, code below is more relevant. + */ for (;;) { - int64_t tm, op; - int64_t *ps; + uint64_t tm, op; + uint64_t *ps; rte_compiler_barrier(); - tm = rte_atomic64_read(cqe + 0); - op = rte_atomic64_read(cqe + 1); + tm = __atomic_load_n(cqe + 0, __ATOMIC_RELAXED); + op = __atomic_load_n(cqe + 1, __ATOMIC_RELAXED); rte_compiler_barrier(); - if (tm != rte_atomic64_read(cqe + 0)) + if (tm != __atomic_load_n(cqe + 0, __ATOMIC_RELAXED)) continue; - if (op != rte_atomic64_read(cqe + 1)) + if (op != __atomic_load_n(cqe + 1, __ATOMIC_RELAXED)) continue; - ps = (int64_t *)ts; + ps = (uint64_t *)ts; ps[0] = tm; ps[1] = op; return; @@ -690,8 +718,8 @@ ci = ci << (64 - MLX5_CQ_INDEX_WIDTH); ci |= (ts << MLX5_CQ_INDEX_WIDTH) >> MLX5_CQ_INDEX_WIDTH; rte_compiler_barrier(); - rte_atomic64_set(&sh->txpp.ts.ts, ts); - rte_atomic64_set(&sh->txpp.ts.ci_ts, ci); + __atomic_store_n(&sh->txpp.ts.ts, ts, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.ts.ci_ts, ci, __ATOMIC_RELAXED); rte_wmb(); } @@ -713,7 +741,8 @@ mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); if (to.cts.op_own >> 4) { DRV_LOG(DEBUG, "Clock Queue error sync lost."); - rte_atomic32_inc(&sh->txpp.err_clock_queue); + __atomic_fetch_add(&sh->txpp.err_clock_queue, + 1, __ATOMIC_RELAXED); sh->txpp.sync_lost = 1; return; } @@ -758,7 +787,10 @@ if (!sh->txpp.clock_queue.sq_ci && !sh->txpp.ts_n) return; MLX5_ASSERT(sh->txpp.ts_p < MLX5_TXPP_REARM_SQ_SIZE); - sh->txpp.tsa[sh->txpp.ts_p] = sh->txpp.ts; + __atomic_store_n(&sh->txpp.tsa[sh->txpp.ts_p].ts, + sh->txpp.ts.ts, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.tsa[sh->txpp.ts_p].ci_ts, + sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); if (++sh->txpp.ts_p >= MLX5_TXPP_REARM_SQ_SIZE) sh->txpp.ts_p = 0; if (sh->txpp.ts_n < MLX5_TXPP_REARM_SQ_SIZE) @@ -799,7 +831,8 @@ /* Check whether we have missed interrupts. */ if (cq_ci - wq->cq_ci != 1) { DRV_LOG(DEBUG, "Rearm Queue missed interrupt."); - rte_atomic32_inc(&sh->txpp.err_miss_int); + __atomic_fetch_add(&sh->txpp.err_miss_int, + 1, __ATOMIC_RELAXED); /* Check sync lost on wqe index. */ if (cq_ci - wq->cq_ci >= (((1UL << MLX5_WQ_INDEX_WIDTH) / @@ -814,7 +847,8 @@ /* Fire new requests to Rearm Queue. */ if (error) { DRV_LOG(DEBUG, "Rearm Queue error sync lost."); - rte_atomic32_inc(&sh->txpp.err_rearm_queue); + __atomic_fetch_add(&sh->txpp.err_rearm_queue, + 1, __ATOMIC_RELAXED); sh->txpp.sync_lost = 1; } } @@ -877,11 +911,11 @@ int ret; int fd; - rte_atomic32_set(&sh->txpp.err_miss_int, 0); - rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); - rte_atomic32_set(&sh->txpp.err_clock_queue, 0); - rte_atomic32_set(&sh->txpp.err_ts_past, 0); - rte_atomic32_set(&sh->txpp.err_ts_future, 0); + sh->txpp.err_miss_int = 0; + sh->txpp.err_rearm_queue = 0; + sh->txpp.err_clock_queue = 0; + sh->txpp.err_ts_past = 0; + sh->txpp.err_ts_future = 0; /* Attach interrupt handler to process Rearm Queue completions. */ fd = mlx5_os_get_devx_channel_fd(sh->txpp.echan); ret = mlx5_os_set_nonblock_channel_fd(fd); @@ -1116,7 +1150,8 @@ mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); if (to.cts.op_own >> 4) { DRV_LOG(DEBUG, "Clock Queue error sync lost."); - rte_atomic32_inc(&sh->txpp.err_clock_queue); + __atomic_fetch_add(&sh->txpp.err_clock_queue, + 1, __ATOMIC_RELAXED); sh->txpp.sync_lost = 1; return -EIO; } @@ -1147,11 +1182,11 @@ int mlx5_txpp_xstats_reset(struct rte_eth_dev *dev) struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_dev_ctx_shared *sh = priv->sh; - rte_atomic32_set(&sh->txpp.err_miss_int, 0); - rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); - rte_atomic32_set(&sh->txpp.err_clock_queue, 0); - rte_atomic32_set(&sh->txpp.err_ts_past, 0); - rte_atomic32_set(&sh->txpp.err_ts_future, 0); + __atomic_store_n(&sh->txpp.err_miss_int, 0, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.err_rearm_queue, 0, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.err_clock_queue, 0, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.err_ts_past, 0, __ATOMIC_RELAXED); + __atomic_store_n(&sh->txpp.err_ts_future, 0, __ATOMIC_RELAXED); return 0; } @@ -1196,19 +1231,21 @@ int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, struct mlx5_txpp_ts *tsa, uint16_t idx) { do { - int64_t ts, ci; + uint64_t ts, ci; - ts = rte_atomic64_read(&txpp->tsa[idx].ts); - ci = rte_atomic64_read(&txpp->tsa[idx].ci_ts); + ts = __atomic_load_n(&txpp->tsa[idx].ts, __ATOMIC_RELAXED); + ci = __atomic_load_n(&txpp->tsa[idx].ci_ts, __ATOMIC_RELAXED); rte_compiler_barrier(); if ((ci ^ ts) << MLX5_CQ_INDEX_WIDTH != 0) continue; - if (rte_atomic64_read(&txpp->tsa[idx].ts) != ts) + if (__atomic_load_n(&txpp->tsa[idx].ts, + __ATOMIC_RELAXED) != ts) continue; - if (rte_atomic64_read(&txpp->tsa[idx].ci_ts) != ci) + if (__atomic_load_n(&txpp->tsa[idx].ci_ts, + __ATOMIC_RELAXED) != ci) continue; - rte_atomic64_set(&tsa->ts, ts); - rte_atomic64_set(&tsa->ci_ts, ci); + tsa->ts = ts; + tsa->ci_ts = ci; return; } while (true); } @@ -1244,9 +1281,9 @@ int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, rte_compiler_barrier(); } while (ts_p != txpp->ts_p); /* We have two neighbor reports, calculate the jitter. */ - dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); - dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)) - - (rte_atomic64_read(&tsa0.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)); + dts = tsa1.ts - tsa0.ts; + dci = (tsa1.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)) - + (tsa0.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)); if (dci < 0) dci += 1 << MLX5_CQ_INDEX_WIDTH; dci *= txpp->tick; @@ -1284,9 +1321,9 @@ int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, rte_compiler_barrier(); } while (ts_p != txpp->ts_p); /* We have two neighbor reports, calculate the jitter. */ - dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); - dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)) - - (rte_atomic64_read(&tsa0.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)); + dts = tsa1.ts - tsa0.ts; + dci = (tsa1.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)) - + (tsa0.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)); dci += 1 << MLX5_CQ_INDEX_WIDTH; dci *= txpp->tick; return (dts > dci) ? dts - dci : dci - dts; @@ -1325,15 +1362,20 @@ int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, for (i = 0; i < n_txpp; ++i) stats[n_used + i].id = n_used + i; stats[n_used + 0].value = - rte_atomic32_read(&sh->txpp.err_miss_int); + __atomic_load_n(&sh->txpp.err_miss_int, + __ATOMIC_RELAXED); stats[n_used + 1].value = - rte_atomic32_read(&sh->txpp.err_rearm_queue); + __atomic_load_n(&sh->txpp.err_rearm_queue, + __ATOMIC_RELAXED); stats[n_used + 2].value = - rte_atomic32_read(&sh->txpp.err_clock_queue); + __atomic_load_n(&sh->txpp.err_clock_queue, + __ATOMIC_RELAXED); stats[n_used + 3].value = - rte_atomic32_read(&sh->txpp.err_ts_past); + __atomic_load_n(&sh->txpp.err_ts_past, + __ATOMIC_RELAXED); stats[n_used + 4].value = - rte_atomic32_read(&sh->txpp.err_ts_future); + __atomic_load_n(&sh->txpp.err_ts_future, + __ATOMIC_RELAXED); stats[n_used + 5].value = mlx5_txpp_xstats_jitter(&sh->txpp); stats[n_used + 6].value = mlx5_txpp_xstats_wander(&sh->txpp); stats[n_used + 7].value = sh->txpp.sync_lost; -- 1.8.3.1 ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [dpdk-dev] [PATCH v3] net/mlx5: use C11 atomics in packet scheduling 2020-10-28 12:44 ` [dpdk-dev] [PATCH v3] " Viacheslav Ovsiienko @ 2020-10-28 22:00 ` Raslan Darawsheh 0 siblings, 0 replies; 4+ messages in thread From: Raslan Darawsheh @ 2020-10-28 22:00 UTC (permalink / raw) To: Slava Ovsiienko, dev Hi, > -----Original Message----- > From: Viacheslav Ovsiienko <viacheslavo@nvidia.com> > Sent: Wednesday, October 28, 2020 2:45 PM > To: dev@dpdk.org > Cc: Raslan Darawsheh <rasland@nvidia.com> > Subject: [PATCH v3] net/mlx5: use C11 atomics in packet scheduling > > The rte_atomic API is deprecated and needs to be replaced with > C11 atomic builtins. Use the relaxed ordering and explicit > memory barrier for Clock Queue and timestamps synchronization. > > Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com> > --- > v1: > https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch > es.dpdk.org%2Fpatch%2F82362%2F&data=02%7C01%7Crasland%40nvid > ia.com%7C1cb693934b224c739c1208d87b3f458d%7C43083d15727340c1b7db3 > 9efd9ccc17a%7C0%7C0%7C637394858963308699&sdata=fKfJ%2FCcmrV > MIBJrWNb2YDVYUysbxbdXEGPBPB1Bi64Y%3D&reserved=0 > v2: > https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch > es.dpdk.org%2Fpatch%2F82612%2F&data=02%7C01%7Crasland%40nvid > ia.com%7C1cb693934b224c739c1208d87b3f458d%7C43083d15727340c1b7db3 > 9efd9ccc17a%7C0%7C0%7C637394858963308699&sdata=HMJWvjT%2Bc > AYrQeJ%2BrNfka9Id%2FVmIuzQX%2BF%2BHH2ndKUI%3D&reserved=0 > fixed the issue of missing __atomic_compare_exchange_16, > replaced with local copy of existing DPDK implementation > of rte_atomic12_compare_exchange being deprecated > v3: minor compilation and codestyle issues > > drivers/net/mlx5/mlx5.h | 14 ++--- > drivers/net/mlx5/mlx5_rxtx.h | 11 ++-- > drivers/net/mlx5/mlx5_txpp.c | 136 ++++++++++++++++++++++++++++---- > ----------- > 3 files changed, 102 insertions(+), 59 deletions(-) > > diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h > index 8de5842..570cc2b 100644 > --- a/drivers/net/mlx5/mlx5.h > +++ b/drivers/net/mlx5/mlx5.h > @@ -563,8 +563,8 @@ struct mlx5_txpp_wq { > > /* Tx packet pacing internal timestamp. */ > struct mlx5_txpp_ts { > - rte_atomic64_t ci_ts; > - rte_atomic64_t ts; > + uint64_t ci_ts; > + uint64_t ts; > }; > > /* Tx packet pacing structure. */ > @@ -587,11 +587,11 @@ struct mlx5_dev_txpp { > struct mlx5_txpp_ts ts; /* Cached completion id/timestamp. */ > uint32_t sync_lost:1; /* ci/timestamp synchronization lost. */ > /* Statistics counters. */ > - rte_atomic32_t err_miss_int; /* Missed service interrupt. */ > - rte_atomic32_t err_rearm_queue; /* Rearm Queue errors. */ > - rte_atomic32_t err_clock_queue; /* Clock Queue errors. */ > - rte_atomic32_t err_ts_past; /* Timestamp in the past. */ > - rte_atomic32_t err_ts_future; /* Timestamp in the distant future. */ > + uint64_t err_miss_int; /* Missed service interrupt. */ > + uint64_t err_rearm_queue; /* Rearm Queue errors. */ > + uint64_t err_clock_queue; /* Clock Queue errors. */ > + uint64_t err_ts_past; /* Timestamp in the past. */ > + uint64_t err_ts_future; /* Timestamp in the distant future. */ > }; > > /* Supported flex parser profile ID. */ > diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h > index 1b5fba4..84eaef7 100644 > --- a/drivers/net/mlx5/mlx5_rxtx.h > +++ b/drivers/net/mlx5/mlx5_rxtx.h > @@ -14,7 +14,6 @@ > #include <rte_mempool.h> > #include <rte_common.h> > #include <rte_hexdump.h> > -#include <rte_atomic.h> > #include <rte_spinlock.h> > #include <rte_io.h> > #include <rte_bus_pci.h> > @@ -677,8 +676,8 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, > void *addr, uint64_t iova, > * the service thread, data should be re-read. > */ > rte_compiler_barrier(); > - ci = rte_atomic64_read(&sh->txpp.ts.ci_ts); > - ts = rte_atomic64_read(&sh->txpp.ts.ts); > + ci = __atomic_load_n(&sh->txpp.ts.ci_ts, > __ATOMIC_RELAXED); > + ts = __atomic_load_n(&sh->txpp.ts.ts, > __ATOMIC_RELAXED); > rte_compiler_barrier(); > if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) > break; > @@ -688,7 +687,8 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, > void *addr, uint64_t iova, > mts -= ts; > if (unlikely(mts >= UINT64_MAX / 2)) { > /* We have negative integer, mts is in the past. */ > - rte_atomic32_inc(&sh->txpp.err_ts_past); > + __atomic_fetch_add(&sh->txpp.err_ts_past, > + 1, __ATOMIC_RELAXED); > return -1; > } > tick = sh->txpp.tick; > @@ -697,7 +697,8 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, > void *addr, uint64_t iova, > mts = (mts + tick - 1) / tick; > if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { > /* We have mts is too distant future. */ > - rte_atomic32_inc(&sh->txpp.err_ts_future); > + __atomic_fetch_add(&sh->txpp.err_ts_future, > + 1, __ATOMIC_RELAXED); > return -1; > } > mts <<= 64 - MLX5_CQ_INDEX_WIDTH; > diff --git a/drivers/net/mlx5/mlx5_txpp.c b/drivers/net/mlx5/mlx5_txpp.c > index 37355fa..8aad92f 100644 > --- a/drivers/net/mlx5/mlx5_txpp.c > +++ b/drivers/net/mlx5/mlx5_txpp.c > @@ -642,6 +642,32 @@ > aq->arm_sn++; > } > > +#if defined(RTE_ARCH_X86_64) > +static inline int > +mlx5_atomic128_compare_exchange(rte_int128_t *dst, > + rte_int128_t *exp, > + const rte_int128_t *src) > +{ > + uint8_t res; > + > + asm volatile (MPLOCKED > + "cmpxchg16b %[dst];" > + " sete %[res]" > + : [dst] "=m" (dst->val[0]), > + "=a" (exp->val[0]), > + "=d" (exp->val[1]), > + [res] "=r" (res) > + : "b" (src->val[0]), > + "c" (src->val[1]), > + "a" (exp->val[0]), > + "d" (exp->val[1]), > + "m" (dst->val[0]) > + : "memory"); > + > + return res; > +} > +#endif > + > static inline void > mlx5_atomic_read_cqe(rte_int128_t *from, rte_int128_t *ts) > { > @@ -650,31 +676,33 @@ > * update by hardware with soecified rate. We have to > * read timestump and WQE completion index atomically. > */ > -#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64) > +#if defined(RTE_ARCH_X86_64) > rte_int128_t src; > > memset(&src, 0, sizeof(src)); > *ts = src; > /* if (*from == *ts) *from = *src else *ts = *from; */ > - rte_atomic128_cmp_exchange(from, ts, &src, 0, > - __ATOMIC_RELAXED, > __ATOMIC_RELAXED); > + mlx5_atomic128_compare_exchange(from, ts, &src); > #else > - rte_atomic64_t *cqe = (rte_atomic64_t *)from; > + uint64_t *cqe = (uint64_t *)from; > > - /* Power architecture does not support 16B compare-and-swap. */ > + /* > + * Power architecture does not support 16B compare-and-swap. > + * ARM implements it in software, code below is more relevant. > + */ > for (;;) { > - int64_t tm, op; > - int64_t *ps; > + uint64_t tm, op; > + uint64_t *ps; > > rte_compiler_barrier(); > - tm = rte_atomic64_read(cqe + 0); > - op = rte_atomic64_read(cqe + 1); > + tm = __atomic_load_n(cqe + 0, __ATOMIC_RELAXED); > + op = __atomic_load_n(cqe + 1, __ATOMIC_RELAXED); > rte_compiler_barrier(); > - if (tm != rte_atomic64_read(cqe + 0)) > + if (tm != __atomic_load_n(cqe + 0, __ATOMIC_RELAXED)) > continue; > - if (op != rte_atomic64_read(cqe + 1)) > + if (op != __atomic_load_n(cqe + 1, __ATOMIC_RELAXED)) > continue; > - ps = (int64_t *)ts; > + ps = (uint64_t *)ts; > ps[0] = tm; > ps[1] = op; > return; > @@ -690,8 +718,8 @@ > ci = ci << (64 - MLX5_CQ_INDEX_WIDTH); > ci |= (ts << MLX5_CQ_INDEX_WIDTH) >> MLX5_CQ_INDEX_WIDTH; > rte_compiler_barrier(); > - rte_atomic64_set(&sh->txpp.ts.ts, ts); > - rte_atomic64_set(&sh->txpp.ts.ci_ts, ci); > + __atomic_store_n(&sh->txpp.ts.ts, ts, __ATOMIC_RELAXED); > + __atomic_store_n(&sh->txpp.ts.ci_ts, ci, __ATOMIC_RELAXED); > rte_wmb(); > } > > @@ -713,7 +741,8 @@ > mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, > &to.u128); > if (to.cts.op_own >> 4) { > DRV_LOG(DEBUG, "Clock Queue error sync lost."); > - rte_atomic32_inc(&sh->txpp.err_clock_queue); > + __atomic_fetch_add(&sh->txpp.err_clock_queue, > + 1, __ATOMIC_RELAXED); > sh->txpp.sync_lost = 1; > return; > } > @@ -758,7 +787,10 @@ > if (!sh->txpp.clock_queue.sq_ci && !sh->txpp.ts_n) > return; > MLX5_ASSERT(sh->txpp.ts_p < MLX5_TXPP_REARM_SQ_SIZE); > - sh->txpp.tsa[sh->txpp.ts_p] = sh->txpp.ts; > + __atomic_store_n(&sh->txpp.tsa[sh->txpp.ts_p].ts, > + sh->txpp.ts.ts, __ATOMIC_RELAXED); > + __atomic_store_n(&sh->txpp.tsa[sh->txpp.ts_p].ci_ts, > + sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); > if (++sh->txpp.ts_p >= MLX5_TXPP_REARM_SQ_SIZE) > sh->txpp.ts_p = 0; > if (sh->txpp.ts_n < MLX5_TXPP_REARM_SQ_SIZE) > @@ -799,7 +831,8 @@ > /* Check whether we have missed interrupts. */ > if (cq_ci - wq->cq_ci != 1) { > DRV_LOG(DEBUG, "Rearm Queue missed > interrupt."); > - rte_atomic32_inc(&sh->txpp.err_miss_int); > + __atomic_fetch_add(&sh->txpp.err_miss_int, > + 1, __ATOMIC_RELAXED); > /* Check sync lost on wqe index. */ > if (cq_ci - wq->cq_ci >= > (((1UL << MLX5_WQ_INDEX_WIDTH) / > @@ -814,7 +847,8 @@ > /* Fire new requests to Rearm Queue. */ > if (error) { > DRV_LOG(DEBUG, "Rearm Queue error sync lost."); > - rte_atomic32_inc(&sh->txpp.err_rearm_queue); > + __atomic_fetch_add(&sh->txpp.err_rearm_queue, > + 1, __ATOMIC_RELAXED); > sh->txpp.sync_lost = 1; > } > } > @@ -877,11 +911,11 @@ > int ret; > int fd; > > - rte_atomic32_set(&sh->txpp.err_miss_int, 0); > - rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); > - rte_atomic32_set(&sh->txpp.err_clock_queue, 0); > - rte_atomic32_set(&sh->txpp.err_ts_past, 0); > - rte_atomic32_set(&sh->txpp.err_ts_future, 0); > + sh->txpp.err_miss_int = 0; > + sh->txpp.err_rearm_queue = 0; > + sh->txpp.err_clock_queue = 0; > + sh->txpp.err_ts_past = 0; > + sh->txpp.err_ts_future = 0; > /* Attach interrupt handler to process Rearm Queue completions. */ > fd = mlx5_os_get_devx_channel_fd(sh->txpp.echan); > ret = mlx5_os_set_nonblock_channel_fd(fd); > @@ -1116,7 +1150,8 @@ > mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, > &to.u128); > if (to.cts.op_own >> 4) { > DRV_LOG(DEBUG, "Clock Queue error sync lost."); > - rte_atomic32_inc(&sh->txpp.err_clock_queue); > + __atomic_fetch_add(&sh->txpp.err_clock_queue, > + 1, __ATOMIC_RELAXED); > sh->txpp.sync_lost = 1; > return -EIO; > } > @@ -1147,11 +1182,11 @@ int mlx5_txpp_xstats_reset(struct rte_eth_dev > *dev) > struct mlx5_priv *priv = dev->data->dev_private; > struct mlx5_dev_ctx_shared *sh = priv->sh; > > - rte_atomic32_set(&sh->txpp.err_miss_int, 0); > - rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); > - rte_atomic32_set(&sh->txpp.err_clock_queue, 0); > - rte_atomic32_set(&sh->txpp.err_ts_past, 0); > - rte_atomic32_set(&sh->txpp.err_ts_future, 0); > + __atomic_store_n(&sh->txpp.err_miss_int, 0, > __ATOMIC_RELAXED); > + __atomic_store_n(&sh->txpp.err_rearm_queue, 0, > __ATOMIC_RELAXED); > + __atomic_store_n(&sh->txpp.err_clock_queue, 0, > __ATOMIC_RELAXED); > + __atomic_store_n(&sh->txpp.err_ts_past, 0, __ATOMIC_RELAXED); > + __atomic_store_n(&sh->txpp.err_ts_future, 0, > __ATOMIC_RELAXED); > return 0; > } > > @@ -1196,19 +1231,21 @@ int mlx5_txpp_xstats_get_names(struct > rte_eth_dev *dev __rte_unused, > struct mlx5_txpp_ts *tsa, uint16_t idx) > { > do { > - int64_t ts, ci; > + uint64_t ts, ci; > > - ts = rte_atomic64_read(&txpp->tsa[idx].ts); > - ci = rte_atomic64_read(&txpp->tsa[idx].ci_ts); > + ts = __atomic_load_n(&txpp->tsa[idx].ts, > __ATOMIC_RELAXED); > + ci = __atomic_load_n(&txpp->tsa[idx].ci_ts, > __ATOMIC_RELAXED); > rte_compiler_barrier(); > if ((ci ^ ts) << MLX5_CQ_INDEX_WIDTH != 0) > continue; > - if (rte_atomic64_read(&txpp->tsa[idx].ts) != ts) > + if (__atomic_load_n(&txpp->tsa[idx].ts, > + __ATOMIC_RELAXED) != ts) > continue; > - if (rte_atomic64_read(&txpp->tsa[idx].ci_ts) != ci) > + if (__atomic_load_n(&txpp->tsa[idx].ci_ts, > + __ATOMIC_RELAXED) != ci) > continue; > - rte_atomic64_set(&tsa->ts, ts); > - rte_atomic64_set(&tsa->ci_ts, ci); > + tsa->ts = ts; > + tsa->ci_ts = ci; > return; > } while (true); > } > @@ -1244,9 +1281,9 @@ int mlx5_txpp_xstats_get_names(struct > rte_eth_dev *dev __rte_unused, > rte_compiler_barrier(); > } while (ts_p != txpp->ts_p); > /* We have two neighbor reports, calculate the jitter. */ > - dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); > - dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - > MLX5_CQ_INDEX_WIDTH)) - > - (rte_atomic64_read(&tsa0.ci_ts) >> (64 - > MLX5_CQ_INDEX_WIDTH)); > + dts = tsa1.ts - tsa0.ts; > + dci = (tsa1.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)) - > + (tsa0.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)); > if (dci < 0) > dci += 1 << MLX5_CQ_INDEX_WIDTH; > dci *= txpp->tick; > @@ -1284,9 +1321,9 @@ int mlx5_txpp_xstats_get_names(struct > rte_eth_dev *dev __rte_unused, > rte_compiler_barrier(); > } while (ts_p != txpp->ts_p); > /* We have two neighbor reports, calculate the jitter. */ > - dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); > - dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - > MLX5_CQ_INDEX_WIDTH)) - > - (rte_atomic64_read(&tsa0.ci_ts) >> (64 - > MLX5_CQ_INDEX_WIDTH)); > + dts = tsa1.ts - tsa0.ts; > + dci = (tsa1.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)) - > + (tsa0.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)); > dci += 1 << MLX5_CQ_INDEX_WIDTH; > dci *= txpp->tick; > return (dts > dci) ? dts - dci : dci - dts; > @@ -1325,15 +1362,20 @@ int mlx5_txpp_xstats_get_names(struct > rte_eth_dev *dev __rte_unused, > for (i = 0; i < n_txpp; ++i) > stats[n_used + i].id = n_used + i; > stats[n_used + 0].value = > - rte_atomic32_read(&sh->txpp.err_miss_int); > + __atomic_load_n(&sh->txpp.err_miss_int, > + __ATOMIC_RELAXED); > stats[n_used + 1].value = > - rte_atomic32_read(&sh- > >txpp.err_rearm_queue); > + __atomic_load_n(&sh- > >txpp.err_rearm_queue, > + __ATOMIC_RELAXED); > stats[n_used + 2].value = > - rte_atomic32_read(&sh- > >txpp.err_clock_queue); > + __atomic_load_n(&sh- > >txpp.err_clock_queue, > + __ATOMIC_RELAXED); > stats[n_used + 3].value = > - rte_atomic32_read(&sh->txpp.err_ts_past); > + __atomic_load_n(&sh->txpp.err_ts_past, > + __ATOMIC_RELAXED); > stats[n_used + 4].value = > - rte_atomic32_read(&sh- > >txpp.err_ts_future); > + __atomic_load_n(&sh->txpp.err_ts_future, > + __ATOMIC_RELAXED); > stats[n_used + 5].value = mlx5_txpp_xstats_jitter(&sh- > >txpp); > stats[n_used + 6].value = mlx5_txpp_xstats_wander(&sh- > >txpp); > stats[n_used + 7].value = sh->txpp.sync_lost; > -- > 1.8.3.1 Patch applied to next-net-mlx, Kindest regards, Raslan Darawsheh ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-10-28 22:01 UTC | newest] Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-10-27 19:03 [dpdk-dev] [PATCH] net/mlx5: use C11 atomics in packet scheduling Viacheslav Ovsiienko 2020-10-28 10:07 ` [dpdk-dev] [PATCH v2] " Viacheslav Ovsiienko 2020-10-28 12:44 ` [dpdk-dev] [PATCH v3] " Viacheslav Ovsiienko 2020-10-28 22:00 ` Raslan Darawsheh
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).