* [PATCH 1/4] cnxk/net: add fc check in vector event Tx path
@ 2022-07-19 11:11 pbhagavatula
2022-07-19 11:11 ` [PATCH 2/4] event/cnxk: avoid reading non cached registers pbhagavatula
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: pbhagavatula @ 2022-07-19 11:11 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
Satha Rao, Pavan Nikhilesh, Shijith Thotton
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add FC check in vector event Tx path, the check needs to be
performed after head wait right before LMTST is issued.
Since, SQB pool fc updates are delayed w.r.t the actual
utilization of pool add sufficient slack to avoid overflow.
Added a new device argument to override the default SQB slack
configured, can be used as follows:
-a 0002:02:00.0,sqb_slack=32
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/nics/cnxk.rst | 12 +++++++
drivers/common/cnxk/roc_nix.h | 7 ++--
drivers/common/cnxk/roc_nix_priv.h | 1 -
drivers/common/cnxk/roc_nix_queue.c | 21 +++++------
drivers/common/cnxk/roc_nix_tm.c | 2 +-
drivers/common/cnxk/roc_nix_tm_ops.c | 4 +--
drivers/event/cnxk/cn10k_eventdev.c | 3 +-
drivers/event/cnxk/cn9k_eventdev.c | 3 +-
drivers/event/cnxk/cn9k_worker.h | 4 +++
drivers/event/cnxk/cnxk_eventdev_adptr.c | 9 ++---
drivers/net/cnxk/cn10k_tx.h | 46 ++++++++++++++++++++++++
drivers/net/cnxk/cnxk_ethdev_devargs.c | 8 ++++-
12 files changed, 97 insertions(+), 23 deletions(-)
diff --git a/doc/guides/nics/cnxk.rst b/doc/guides/nics/cnxk.rst
index e24eaa8bc4..eeaa3fa1cc 100644
--- a/doc/guides/nics/cnxk.rst
+++ b/doc/guides/nics/cnxk.rst
@@ -157,6 +157,18 @@ Runtime Config Options
With the above configuration, each send queue's descriptor buffer count is
limited to a maximum of 64 buffers.
+- ``SQB slack count`` (default ``12``)
+
+ Send queue descriptor slack count added to SQB count when a Tx queue is
+ created, can be set using ``sqb_slack`` ``devargs`` parameter.
+
+ For example::
+
+ -a 0002:02:00.0,sqb_slack=32
+
+ With the above configuration, each send queue's descriptor buffer count will
+ be increased by 32, while keeping the queue limit to default configuration.
+
- ``Switch header enable`` (default ``none``)
A port can be configured to a specific switch header type by using
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index 4e5cf05285..3ad3a7e7c2 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -13,6 +13,8 @@
#define ROC_NIX_BPF_STATS_MAX 12
#define ROC_NIX_MTR_ID_INVALID UINT32_MAX
#define ROC_NIX_PFC_CLASS_INVALID UINT8_MAX
+#define ROC_NIX_SQB_LOWER_THRESH 70U
+#define ROC_NIX_SQB_SLACK 12U
enum roc_nix_rss_reta_sz {
ROC_NIX_RSS_RETA_SZ_64 = 64,
@@ -404,19 +406,20 @@ struct roc_nix {
bool enable_loop;
bool hw_vlan_ins;
uint8_t lock_rx_ctx;
- uint32_t outb_nb_desc;
+ uint16_t sqb_slack;
uint16_t outb_nb_crypto_qs;
+ uint32_t outb_nb_desc;
uint32_t ipsec_in_min_spi;
uint32_t ipsec_in_max_spi;
uint32_t ipsec_out_max_sa;
bool ipsec_out_sso_pffunc;
+ bool custom_sa_action;
/* End of input parameters */
/* LMT line base for "Per Core Tx LMT line" mode*/
uintptr_t lmt_base;
bool io_enabled;
bool rx_ptp_ena;
uint16_t cints;
- bool custom_sa_action;
#define ROC_NIX_MEM_SZ (6 * 1024)
uint8_t reserved[ROC_NIX_MEM_SZ] __plt_cache_aligned;
diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h
index 5b0522c8cb..a3d4ddf5d5 100644
--- a/drivers/common/cnxk/roc_nix_priv.h
+++ b/drivers/common/cnxk/roc_nix_priv.h
@@ -13,7 +13,6 @@
#define NIX_DEF_SQB ((uint16_t)16)
#define NIX_MIN_SQB ((uint16_t)8)
#define NIX_SQB_LIST_SPACE ((uint16_t)2)
-#define NIX_SQB_LOWER_THRESH ((uint16_t)70)
/* Apply BP/DROP when CQ is 95% full */
#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100)
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index fa4c954631..692b13415a 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -682,12 +682,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
else
sqes_per_sqb = (blk_sz / 8) / 8;
- sq->nb_desc = PLT_MAX(256U, sq->nb_desc);
+ sq->nb_desc = PLT_MAX(512U, sq->nb_desc);
nb_sqb_bufs = sq->nb_desc / sqes_per_sqb;
nb_sqb_bufs += NIX_SQB_LIST_SPACE;
/* Clamp up the SQB count */
nb_sqb_bufs = PLT_MIN(roc_nix->max_sqb_count,
- (uint16_t)PLT_MAX(NIX_DEF_SQB, nb_sqb_bufs));
+ PLT_MAX(NIX_DEF_SQB, nb_sqb_bufs));
sq->nb_sqb_bufs = nb_sqb_bufs;
sq->sqes_per_sqb_log2 = (uint16_t)plt_log2_u32(sqes_per_sqb);
@@ -695,8 +695,9 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
nb_sqb_bufs -
(PLT_ALIGN_MUL_CEIL(nb_sqb_bufs, sqes_per_sqb) / sqes_per_sqb);
sq->nb_sqb_bufs_adj =
- (sq->nb_sqb_bufs_adj * NIX_SQB_LOWER_THRESH) / 100;
+ (sq->nb_sqb_bufs_adj * ROC_NIX_SQB_LOWER_THRESH) / 100;
+ nb_sqb_bufs += roc_nix->sqb_slack;
/* Explicitly set nat_align alone as by default pool is with both
* nat_align and buf_offset = 1 which we don't want for SQB.
*/
@@ -711,12 +712,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
aura.fc_stype = 0x3; /* STSTP */
aura.fc_addr = (uint64_t)sq->fc;
aura.fc_hyst_bits = 0; /* Store count on all updates */
- rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, NIX_MAX_SQB, &aura,
+ rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, nb_sqb_bufs, &aura,
&pool);
if (rc)
goto fail;
- sq->sqe_mem = plt_zmalloc(blk_sz * NIX_MAX_SQB, blk_sz);
+ sq->sqe_mem = plt_zmalloc(blk_sz * nb_sqb_bufs, blk_sz);
if (sq->sqe_mem == NULL) {
rc = NIX_ERR_NO_MEM;
goto nomem;
@@ -724,21 +725,21 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
/* Fill the initial buffers */
iova = (uint64_t)sq->sqe_mem;
- for (count = 0; count < NIX_MAX_SQB; count++) {
+ for (count = 0; count < nb_sqb_bufs; count++) {
roc_npa_aura_op_free(sq->aura_handle, 0, iova);
iova += blk_sz;
}
- if (roc_npa_aura_op_available_wait(sq->aura_handle, NIX_MAX_SQB, 0) !=
- NIX_MAX_SQB) {
+ if (roc_npa_aura_op_available_wait(sq->aura_handle, nb_sqb_bufs, 0) !=
+ nb_sqb_bufs) {
plt_err("Failed to free all pointers to the pool");
rc = NIX_ERR_NO_MEM;
goto npa_fail;
}
roc_npa_aura_op_range_set(sq->aura_handle, (uint64_t)sq->sqe_mem, iova);
- roc_npa_aura_limit_modify(sq->aura_handle, sq->nb_sqb_bufs);
- sq->aura_sqb_bufs = NIX_MAX_SQB;
+ roc_npa_aura_limit_modify(sq->aura_handle, nb_sqb_bufs);
+ sq->aura_sqb_bufs = nb_sqb_bufs;
return rc;
npa_fail:
diff --git a/drivers/common/cnxk/roc_nix_tm.c b/drivers/common/cnxk/roc_nix_tm.c
index a31abded1a..81d491a3fd 100644
--- a/drivers/common/cnxk/roc_nix_tm.c
+++ b/drivers/common/cnxk/roc_nix_tm.c
@@ -594,7 +594,7 @@ roc_nix_tm_sq_flush_spin(struct roc_nix_sq *sq)
/* SQ reached quiescent state */
if (sqb_cnt <= 1 && head_off == tail_off &&
- (*(volatile uint64_t *)sq->fc == sq->nb_sqb_bufs)) {
+ (*(volatile uint64_t *)sq->fc == sq->aura_sqb_bufs)) {
break;
}
diff --git a/drivers/common/cnxk/roc_nix_tm_ops.c b/drivers/common/cnxk/roc_nix_tm_ops.c
index 4aa55002fe..7036495ad8 100644
--- a/drivers/common/cnxk/roc_nix_tm_ops.c
+++ b/drivers/common/cnxk/roc_nix_tm_ops.c
@@ -67,7 +67,7 @@ roc_nix_tm_sq_aura_fc(struct roc_nix_sq *sq, bool enable)
if (enable)
*(volatile uint64_t *)sq->fc = rsp->aura.count;
else
- *(volatile uint64_t *)sq->fc = sq->nb_sqb_bufs;
+ *(volatile uint64_t *)sq->fc = sq->aura_sqb_bufs;
/* Sync write barrier */
plt_wmb();
return 0;
@@ -535,7 +535,7 @@ roc_nix_tm_hierarchy_disable(struct roc_nix *roc_nix)
tail_off = (val >> 28) & 0x3F;
if (sqb_cnt > 1 || head_off != tail_off ||
- (*(uint64_t *)sq->fc != sq->nb_sqb_bufs))
+ (*(uint64_t *)sq->fc != sq->aura_sqb_bufs))
plt_err("Failed to gracefully flush sq %u", sq->qid);
}
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 5a0cab40a9..4a2554a8eb 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -815,7 +815,8 @@ cn10k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
sq->nb_sqb_bufs_adj -= (cnxk_eth_dev->outb.nb_desc /
(sqes_per_sqb - 1));
txq->nb_sqb_bufs_adj = sq->nb_sqb_bufs_adj;
- txq->nb_sqb_bufs_adj = (70 * txq->nb_sqb_bufs_adj) / 100;
+ txq->nb_sqb_bufs_adj =
+ (ROC_NIX_SQB_LOWER_THRESH * txq->nb_sqb_bufs_adj) / 100;
}
}
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 2e27030049..ad50570b38 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -1046,7 +1046,8 @@ cn9k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
sq->nb_sqb_bufs_adj -= (cnxk_eth_dev->outb.nb_desc /
(sqes_per_sqb - 1));
txq->nb_sqb_bufs_adj = sq->nb_sqb_bufs_adj;
- txq->nb_sqb_bufs_adj = (70 * txq->nb_sqb_bufs_adj) / 100;
+ txq->nb_sqb_bufs_adj =
+ (ROC_NIX_SQB_LOWER_THRESH * txq->nb_sqb_bufs_adj) / 100;
}
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 64e97e321a..5782f3ed8f 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -749,6 +749,10 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
!(flags & NIX_TX_OFFLOAD_SECURITY_F))
rte_io_wmb();
txq = cn9k_sso_hws_xtract_meta(m, txq_data);
+ if (((txq->nb_sqb_bufs_adj -
+ __atomic_load_n((int16_t *)txq->fc_mem, __ATOMIC_RELAXED))
+ << txq->sqes_per_sqb_log2) <= 0)
+ return 0;
cn9k_nix_tx_skeleton(txq, cmd, flags, 0);
cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt, txq->mark_flag,
txq->mark_fmt);
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 1f2e1b4b5d..b1bc25883e 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -342,14 +342,15 @@ cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs)
{
int rc;
- if (sq->nb_sqb_bufs != nb_sqb_bufs) {
+ if (sq->aura_sqb_bufs != nb_sqb_bufs) {
rc = roc_npa_aura_limit_modify(
sq->aura_handle,
RTE_MIN(nb_sqb_bufs, sq->aura_sqb_bufs));
if (rc < 0)
return rc;
- sq->nb_sqb_bufs = RTE_MIN(nb_sqb_bufs, sq->aura_sqb_bufs);
+ sq->nb_sqb_bufs = RTE_MIN(nb_sqb_bufs, sq->aura_sqb_bufs) -
+ sq->roc_nix->sqb_slack;
}
return 0;
}
@@ -547,7 +548,7 @@ cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
} else {
txq = eth_dev->data->tx_queues[tx_queue_id];
sq = &cnxk_eth_dev->sqs[tx_queue_id];
- cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs);
+ cnxk_sso_sqb_aura_limit_edit(sq, sq->aura_sqb_bufs);
ret = cnxk_sso_updt_tx_queue_data(
event_dev, eth_dev->data->port_id, tx_queue_id, txq);
if (ret < 0)
@@ -579,7 +580,7 @@ cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
i);
} else {
sq = &cnxk_eth_dev->sqs[tx_queue_id];
- cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs);
+ cnxk_sso_sqb_aura_limit_edit(sq, sq->aura_sqb_bufs);
ret = cnxk_sso_updt_tx_queue_data(
event_dev, eth_dev->data->port_id, tx_queue_id, NULL);
if (ret < 0)
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index ea13866b20..8056510589 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -54,6 +54,31 @@
#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
+static __plt_always_inline void
+cn10k_nix_vwqe_wait_fc(struct cn10k_eth_txq *txq, int64_t req)
+{
+ int64_t cached, refill;
+
+retry:
+ while (__atomic_load_n(&txq->fc_cache_pkts, __ATOMIC_RELAXED) < 0)
+ ;
+ cached = __atomic_sub_fetch(&txq->fc_cache_pkts, req, __ATOMIC_ACQUIRE);
+ /* Check if there is enough space, else update and retry. */
+ if (cached < 0) {
+ /* Check if we have space else retry. */
+ do {
+ refill =
+ (txq->nb_sqb_bufs_adj -
+ __atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED))
+ << txq->sqes_per_sqb_log2;
+ } while (refill <= 0);
+ __atomic_compare_exchange(&txq->fc_cache_pkts, &cached, &refill,
+ 0, __ATOMIC_RELEASE,
+ __ATOMIC_RELAXED);
+ goto retry;
+ }
+}
+
/* Function to determine no of tx subdesc required in case ext
* sub desc is enabled.
*/
@@ -1039,6 +1064,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
data |= (15ULL << 12);
data |= (uint64_t)lmt_id;
+ if (flags & NIX_TX_VWQE_F)
+ cn10k_nix_vwqe_wait_fc(txq, 16);
/* STEOR0 */
roc_lmt_submit_steorl(data, pa);
@@ -1048,6 +1075,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
data |= ((uint64_t)(burst - 17)) << 12;
data |= (uint64_t)(lmt_id + 16);
+ if (flags & NIX_TX_VWQE_F)
+ cn10k_nix_vwqe_wait_fc(txq, burst - 16);
/* STEOR1 */
roc_lmt_submit_steorl(data, pa);
} else if (burst) {
@@ -1057,6 +1086,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
data |= ((uint64_t)(burst - 1)) << 12;
data |= lmt_id;
+ if (flags & NIX_TX_VWQE_F)
+ cn10k_nix_vwqe_wait_fc(txq, burst);
/* STEOR0 */
roc_lmt_submit_steorl(data, pa);
}
@@ -1188,6 +1219,8 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
data0 |= (15ULL << 12);
data0 |= (uint64_t)lmt_id;
+ if (flags & NIX_TX_VWQE_F)
+ cn10k_nix_vwqe_wait_fc(txq, 16);
/* STEOR0 */
roc_lmt_submit_steorl(data0, pa0);
@@ -1197,6 +1230,8 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
data1 |= ((uint64_t)(burst - 17)) << 12;
data1 |= (uint64_t)(lmt_id + 16);
+ if (flags & NIX_TX_VWQE_F)
+ cn10k_nix_vwqe_wait_fc(txq, burst - 16);
/* STEOR1 */
roc_lmt_submit_steorl(data1, pa1);
} else if (burst) {
@@ -1207,6 +1242,8 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
data0 |= ((burst - 1) << 12);
data0 |= (uint64_t)lmt_id;
+ if (flags & NIX_TX_VWQE_F)
+ cn10k_nix_vwqe_wait_fc(txq, burst);
/* STEOR0 */
roc_lmt_submit_steorl(data0, pa0);
}
@@ -2735,6 +2772,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
wd.data[0] |= (15ULL << 12);
wd.data[0] |= (uint64_t)lmt_id;
+ if (flags & NIX_TX_VWQE_F)
+ cn10k_nix_vwqe_wait_fc(txq,
+ cn10k_nix_pkts_per_vec_brst(flags) >> 1);
/* STEOR0 */
roc_lmt_submit_steorl(wd.data[0], pa);
@@ -2750,6 +2790,10 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
wd.data[1] |= (uint64_t)(lmt_id + 16);
+ if (flags & NIX_TX_VWQE_F)
+ cn10k_nix_vwqe_wait_fc(txq,
+ burst - (cn10k_nix_pkts_per_vec_brst(flags) >>
+ 1));
/* STEOR1 */
roc_lmt_submit_steorl(wd.data[1], pa);
} else if (lnum) {
@@ -2765,6 +2809,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
wd.data[0] |= lmt_id;
+ if (flags & NIX_TX_VWQE_F)
+ cn10k_nix_vwqe_wait_fc(txq, burst);
/* STEOR0 */
roc_lmt_submit_steorl(wd.data[0], pa);
}
diff --git a/drivers/net/cnxk/cnxk_ethdev_devargs.c b/drivers/net/cnxk/cnxk_ethdev_devargs.c
index 248582e1f6..4ded850622 100644
--- a/drivers/net/cnxk/cnxk_ethdev_devargs.c
+++ b/drivers/net/cnxk/cnxk_ethdev_devargs.c
@@ -246,6 +246,7 @@ parse_sdp_channel_mask(const char *key, const char *value, void *extra_args)
#define CNXK_SDP_CHANNEL_MASK "sdp_channel_mask"
#define CNXK_FLOW_PRE_L2_INFO "flow_pre_l2_info"
#define CNXK_CUSTOM_SA_ACT "custom_sa_act"
+#define CNXK_SQB_SLACK "sqb_slack"
int
cnxk_ethdev_parse_devargs(struct rte_devargs *devargs, struct cnxk_eth_dev *dev)
@@ -254,6 +255,7 @@ cnxk_ethdev_parse_devargs(struct rte_devargs *devargs, struct cnxk_eth_dev *dev)
uint16_t sqb_count = CNXK_NIX_TX_MAX_SQB;
struct flow_pre_l2_size_info pre_l2_info;
uint32_t ipsec_in_max_spi = BIT(8) - 1;
+ uint16_t sqb_slack = ROC_NIX_SQB_SLACK;
uint32_t ipsec_out_max_sa = BIT(12);
uint16_t flow_prealloc_size = 1;
uint16_t switch_header_type = 0;
@@ -311,6 +313,8 @@ cnxk_ethdev_parse_devargs(struct rte_devargs *devargs, struct cnxk_eth_dev *dev)
&parse_pre_l2_hdr_info, &pre_l2_info);
rte_kvargs_process(kvlist, CNXK_CUSTOM_SA_ACT, &parse_flag,
&custom_sa_act);
+ rte_kvargs_process(kvlist, CNXK_SQB_SLACK, &parse_sqb_count,
+ &sqb_slack);
rte_kvargs_free(kvlist);
null_devargs:
@@ -328,6 +332,7 @@ cnxk_ethdev_parse_devargs(struct rte_devargs *devargs, struct cnxk_eth_dev *dev)
dev->nix.reta_sz = reta_sz;
dev->nix.lock_rx_ctx = lock_rx_ctx;
dev->nix.custom_sa_action = custom_sa_act;
+ dev->nix.sqb_slack = sqb_slack;
dev->npc.flow_prealloc_size = flow_prealloc_size;
dev->npc.flow_max_priority = flow_max_priority;
dev->npc.switch_header_type = switch_header_type;
@@ -356,4 +361,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_cnxk,
CNXK_OUTB_NB_CRYPTO_QS "=<1-64>"
CNXK_NO_INL_DEV "=0"
CNXK_SDP_CHANNEL_MASK "=<1-4095>/<1-4095>"
- CNXK_CUSTOM_SA_ACT "=1");
+ CNXK_CUSTOM_SA_ACT "=1"
+ CNXK_SQB_SLACK "=<12-512>");
--
2.25.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 2/4] event/cnxk: avoid reading non cached registers
2022-07-19 11:11 [PATCH 1/4] cnxk/net: add fc check in vector event Tx path pbhagavatula
@ 2022-07-19 11:11 ` pbhagavatula
2022-09-21 6:15 ` [PATCH v2 1/3] " pbhagavatula
2022-07-19 11:11 ` [PATCH 3/4] event/cnxk: set dequeue mode to prefetch with wait pbhagavatula
2022-07-19 11:11 ` [PATCH 4/4] event/cnxk: disable timer resolution estimation pbhagavatula
2 siblings, 1 reply; 8+ messages in thread
From: pbhagavatula @ 2022-07-19 11:11 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Avoid reading non-cached registers in fastpath.
PENDSTATE need not be read before tag flush in tx enqueue
context as we have additional checks prior to check for
pending flushes.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn9k_worker.h | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 5782f3ed8f..653c51f616 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -156,6 +156,15 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, uint64_t base,
}
}
+static __rte_always_inline void
+cn9k_sso_tx_tag_flush(uint64_t base)
+{
+ if (unlikely(CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_TAG)) ==
+ SSO_TT_EMPTY))
+ return;
+ plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+}
+
static __rte_always_inline void
cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
const uint32_t tag, const uint32_t flags,
@@ -811,7 +820,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
return 1;
}
- cnxk_sso_hws_swtag_flush(base);
+ cn9k_sso_tx_tag_flush(base);
return 1;
}
--
2.25.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 3/4] event/cnxk: set dequeue mode to prefetch with wait
2022-07-19 11:11 [PATCH 1/4] cnxk/net: add fc check in vector event Tx path pbhagavatula
2022-07-19 11:11 ` [PATCH 2/4] event/cnxk: avoid reading non cached registers pbhagavatula
@ 2022-07-19 11:11 ` pbhagavatula
2022-07-19 11:11 ` [PATCH 4/4] event/cnxk: disable timer resolution estimation pbhagavatula
2 siblings, 0 replies; 8+ messages in thread
From: pbhagavatula @ 2022-07-19 11:11 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Set default GetWork(dequeue) mode to prefetch with wait for
event.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 5 +++--
drivers/event/cnxk/cn10k_eventdev.c | 1 +
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 8537f6257e..69eabf7f22 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -79,8 +79,9 @@ Runtime Config Options
- ``CN10K Getwork mode``
- CN10K supports multiple getwork prefetch modes, by default the prefetch
- mode is set to none.
+ CN10K supports three getwork prefetch modes no prefetch[0], prefetch
+ immediately[1] and delayed prefetch on forward progress event[2].
+ The default getwork mode is 2.
For example::
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 4a2554a8eb..6dea550b35 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -981,6 +981,7 @@ cn10k_sso_init(struct rte_eventdev *event_dev)
return 0;
}
+ dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
rc = cnxk_sso_init(event_dev);
if (rc < 0)
return rc;
--
2.25.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 4/4] event/cnxk: disable timer resolution estimation
2022-07-19 11:11 [PATCH 1/4] cnxk/net: add fc check in vector event Tx path pbhagavatula
2022-07-19 11:11 ` [PATCH 2/4] event/cnxk: avoid reading non cached registers pbhagavatula
2022-07-19 11:11 ` [PATCH 3/4] event/cnxk: set dequeue mode to prefetch with wait pbhagavatula
@ 2022-07-19 11:11 ` pbhagavatula
2 siblings, 0 replies; 8+ messages in thread
From: pbhagavatula @ 2022-07-19 11:11 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
Satha Rao, Pavan Nikhilesh, Shijith Thotton
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Disable timer resolution estimation, read TIM LF clock registers
to get the current running clock counter as estimating causes
time drift.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/common/cnxk/hw/tim.h | 6 ++
drivers/event/cnxk/cnxk_tim_evdev.c | 140 +++------------------------
drivers/event/cnxk/cnxk_tim_evdev.h | 49 +++++++---
drivers/event/cnxk/cnxk_tim_worker.c | 18 ----
drivers/event/cnxk/cnxk_tim_worker.h | 3 +-
5 files changed, 62 insertions(+), 154 deletions(-)
diff --git a/drivers/common/cnxk/hw/tim.h b/drivers/common/cnxk/hw/tim.h
index a0fe29ddcf..61c38ae175 100644
--- a/drivers/common/cnxk/hw/tim.h
+++ b/drivers/common/cnxk/hw/tim.h
@@ -31,6 +31,12 @@
#define TIM_LF_INT_VEC_NRSPERR_INT (0x0ull)
#define TIM_LF_INT_VEC_RAS_INT (0x1ull)
#define TIM_LF_RING_AURA (0x0)
+#define TIM_LF_FR_RN_GPIOS (0x020)
+#define TIM_LF_FR_RN_GTI (0x030)
+#define TIM_LF_FR_RN_PTP (0x040)
+#define TIM_LF_FR_RN_TENNS (0x050)
+#define TIM_LF_FR_RN_SYNCE (0x060)
+#define TIM_LF_FR_RN_BTS (0x070)
#define TIM_LF_RING_BASE (0x130)
#define TIM_LF_NRSPERR_INT (0x200)
#define TIM_LF_NRSPERR_INT_W1S (0x208)
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index af67235503..f8a536e71a 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -119,80 +119,6 @@ cnxk_tim_ring_info_get(const struct rte_event_timer_adapter *adptr,
sizeof(struct rte_event_timer_adapter_conf));
}
-static inline void
-sort_multi_array(double ref_arr[], uint64_t arr1[], uint64_t arr2[],
- uint64_t arr3[], uint8_t sz)
-{
- int x;
-
- for (x = 0; x < sz - 1; x++) {
- if (ref_arr[x] > ref_arr[x + 1]) {
- PLT_SWAP(ref_arr[x], ref_arr[x + 1]);
- PLT_SWAP(arr1[x], arr1[x + 1]);
- PLT_SWAP(arr2[x], arr2[x + 1]);
- PLT_SWAP(arr3[x], arr3[x + 1]);
- x = -1;
- }
- }
-}
-
-static inline void
-populate_sample(uint64_t tck[], uint64_t ns[], double diff[], uint64_t dst[],
- uint64_t req_tck, uint64_t clk_freq, double tck_ns, uint8_t sz,
- bool mov_fwd)
-{
- int i;
-
- for (i = 0; i < sz; i++) {
- tck[i] = i ? tck[i - 1] : req_tck;
- do {
- mov_fwd ? tck[i]++ : tck[i]--;
- ns[i] = round((double)tck[i] * tck_ns);
- if (round((double)tck[i] * tck_ns) >
- ((double)tck[i] * tck_ns))
- continue;
- } while (ns[i] % (uint64_t)cnxk_tim_ns_per_tck(clk_freq));
- diff[i] = PLT_MAX((double)ns[i], (double)tck[i] * tck_ns) -
- PLT_MIN((double)ns[i], (double)tck[i] * tck_ns);
- dst[i] = mov_fwd ? tck[i] - req_tck : req_tck - tck[i];
- }
-}
-
-static void
-tim_adjust_resolution(uint64_t *req_ns, uint64_t *req_tck, double tck_ns,
- uint64_t clk_freq, uint64_t max_tmo, uint64_t m_tck)
-{
-#define MAX_SAMPLES 5
- double rmax_diff[MAX_SAMPLES], rmin_diff[MAX_SAMPLES];
- uint64_t min_tck[MAX_SAMPLES], max_tck[MAX_SAMPLES];
- uint64_t min_dst[MAX_SAMPLES], max_dst[MAX_SAMPLES];
- uint64_t min_ns[MAX_SAMPLES], max_ns[MAX_SAMPLES];
- int i;
-
- populate_sample(max_tck, max_ns, rmax_diff, max_dst, *req_tck, clk_freq,
- tck_ns, MAX_SAMPLES, true);
- sort_multi_array(rmax_diff, max_dst, max_tck, max_ns, MAX_SAMPLES);
-
- populate_sample(min_tck, min_ns, rmin_diff, min_dst, *req_tck, clk_freq,
- tck_ns, MAX_SAMPLES, false);
- sort_multi_array(rmin_diff, min_dst, min_tck, min_ns, MAX_SAMPLES);
-
- for (i = 0; i < MAX_SAMPLES; i++) {
- if (min_dst[i] < max_dst[i] && min_tck[i] > m_tck &&
- (max_tmo / min_ns[i]) <=
- (TIM_MAX_BUCKET_SIZE - TIM_MIN_BUCKET_SIZE)) {
- *req_tck = min_tck[i];
- *req_ns = min_ns[i];
- break;
- } else if ((max_tmo / max_ns[i]) <
- (TIM_MAX_BUCKET_SIZE - TIM_MIN_BUCKET_SIZE)) {
- *req_tck = max_tck[i];
- *req_ns = max_ns[i];
- break;
- }
- }
-}
-
static int
cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
{
@@ -263,27 +189,7 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
goto tim_hw_free;
}
- tim_ring->tck_nsec =
- round(RTE_ALIGN_MUL_NEAR((long double)rcfg->timer_tick_ns,
- cnxk_tim_ns_per_tck(clk_freq)));
- if (log10(clk_freq) - floor(log10(clk_freq)) != 0.0) {
- uint64_t req_ns, req_tck;
- double tck_ns;
-
- req_ns = tim_ring->tck_nsec;
- tck_ns = NSECPERSEC / clk_freq;
- req_tck = round(rcfg->timer_tick_ns / tck_ns);
- tim_adjust_resolution(&req_ns, &req_tck, tck_ns, clk_freq,
- rcfg->max_tmo_ns, min_intvl_cyc);
- if ((tim_ring->tck_nsec != req_ns) &&
- !(rcfg->flags & RTE_EVENT_TIMER_ADAPTER_F_ADJUST_RES)) {
- rc = -ERANGE;
- goto tim_hw_free;
- }
- tim_ring->tck_nsec = ceil(req_tck * tck_ns);
- }
-
- tim_ring->tck_int = round((long double)tim_ring->tck_nsec /
+ tim_ring->tck_int = round((double)rcfg->timer_tick_ns /
cnxk_tim_ns_per_tck(clk_freq));
tim_ring->tck_nsec =
ceil(tim_ring->tck_int * cnxk_tim_ns_per_tck(clk_freq));
@@ -296,6 +202,13 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
tim_ring->chunk_sz = dev->chunk_sz;
tim_ring->disable_npa = dev->disable_npa;
tim_ring->enable_stats = dev->enable_stats;
+ tim_ring->base = roc_tim_lf_base_get(&dev->tim, tim_ring->ring_id);
+ tim_ring->tbase = cnxk_tim_get_tick_base(clk_src, tim_ring->base);
+
+ if (roc_model_is_cn9k() && (tim_ring->clk_src == ROC_TIM_CLK_SRC_GTI))
+ tim_ring->tick_fn = cnxk_tim_cntvct;
+ else
+ tim_ring->tick_fn = cnxk_tim_tick_read;
for (i = 0; i < dev->ring_ctl_cnt; i++) {
struct cnxk_tim_ctl *ring_ctl = &dev->ring_ctl_data[i];
@@ -342,7 +255,6 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
goto tim_chnk_free;
}
- tim_ring->base = roc_tim_lf_base_get(&dev->tim, tim_ring->ring_id);
plt_write64((uint64_t)tim_ring->bkt, tim_ring->base + TIM_LF_RING_BASE);
plt_write64(tim_ring->aura, tim_ring->base + TIM_LF_RING_AURA);
@@ -391,31 +303,6 @@ cnxk_tim_ring_free(struct rte_event_timer_adapter *adptr)
return 0;
}
-static void
-cnxk_tim_calibrate_start_tsc(struct cnxk_tim_ring *tim_ring)
-{
-#define CNXK_TIM_CALIB_ITER 1E6
- uint32_t real_bkt, bucket;
- int icount, ecount = 0;
- uint64_t bkt_cyc;
-
- for (icount = 0; icount < CNXK_TIM_CALIB_ITER; icount++) {
- real_bkt = plt_read64(tim_ring->base + TIM_LF_RING_REL) >> 44;
- bkt_cyc = cnxk_tim_cntvct();
- bucket = (bkt_cyc - tim_ring->ring_start_cyc) /
- tim_ring->tck_int;
- bucket = bucket % (tim_ring->nb_bkts);
- tim_ring->ring_start_cyc =
- bkt_cyc - (real_bkt * tim_ring->tck_int);
- if (bucket != real_bkt)
- ecount++;
- }
- tim_ring->last_updt_cyc = bkt_cyc;
- plt_tim_dbg("Bucket mispredict %3.2f distance %d\n",
- 100 - (((double)(icount - ecount) / (double)icount) * 100),
- bucket - real_bkt);
-}
-
static int
cnxk_tim_ring_start(const struct rte_event_timer_adapter *adptr)
{
@@ -431,12 +318,16 @@ cnxk_tim_ring_start(const struct rte_event_timer_adapter *adptr)
if (rc < 0)
return rc;
- tim_ring->tot_int = tim_ring->tck_int * tim_ring->nb_bkts;
tim_ring->fast_div = rte_reciprocal_value_u64(tim_ring->tck_int);
tim_ring->fast_bkt = rte_reciprocal_value_u64(tim_ring->nb_bkts);
- cnxk_tim_calibrate_start_tsc(tim_ring);
+ if (roc_model_is_cn9k() && (tim_ring->clk_src == ROC_TIM_CLK_SRC_GTI)) {
+ uint64_t start_diff;
+ start_diff = cnxk_tim_cntvct(tim_ring->tbase) -
+ cnxk_tim_tick_read(tim_ring->tbase);
+ tim_ring->ring_start_cyc += start_diff;
+ }
return rc;
}
@@ -462,7 +353,8 @@ cnxk_tim_stats_get(const struct rte_event_timer_adapter *adapter,
struct rte_event_timer_adapter_stats *stats)
{
struct cnxk_tim_ring *tim_ring = adapter->data->adapter_priv;
- uint64_t bkt_cyc = cnxk_tim_cntvct() - tim_ring->ring_start_cyc;
+ uint64_t bkt_cyc =
+ tim_ring->tick_fn(tim_ring->tbase) - tim_ring->ring_start_cyc;
stats->evtim_exp_count =
__atomic_load_n(&tim_ring->arm_cnt, __ATOMIC_RELAXED);
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index 91a90ee2ce..0fda9f4f13 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -24,14 +24,14 @@
#define CNXK_TIM_EVDEV_NAME cnxk_tim_eventdev
#define CNXK_TIM_MAX_BUCKETS (0xFFFFF)
-#define CNXK_TIM_RING_DEF_CHUNK_SZ (4096)
+#define CNXK_TIM_RING_DEF_CHUNK_SZ (256)
#define CNXK_TIM_CHUNK_ALIGNMENT (16)
#define CNXK_TIM_MAX_BURST \
(RTE_CACHE_LINE_SIZE / CNXK_TIM_CHUNK_ALIGNMENT)
#define CNXK_TIM_NB_CHUNK_SLOTS(sz) (((sz) / CNXK_TIM_CHUNK_ALIGNMENT) - 1)
#define CNXK_TIM_MIN_CHUNK_SLOTS (0x1)
#define CNXK_TIM_MAX_CHUNK_SLOTS (0x1FFE)
-#define CNXK_TIM_MAX_POOL_CACHE_SZ (128)
+#define CNXK_TIM_MAX_POOL_CACHE_SZ (16)
#define CN9K_TIM_MIN_TMO_TKS (256)
@@ -119,18 +119,18 @@ struct cnxk_tim_bkt {
};
struct cnxk_tim_ring {
- uintptr_t base;
uint16_t nb_chunk_slots;
uint32_t nb_bkts;
- uint64_t last_updt_cyc;
+ uintptr_t tbase;
+ uint64_t (*tick_fn)(uint64_t tbase);
uint64_t ring_start_cyc;
- uint64_t tck_int;
- uint64_t tot_int;
struct cnxk_tim_bkt *bkt;
struct rte_mempool *chunk_pool;
struct rte_reciprocal_u64 fast_div;
struct rte_reciprocal_u64 fast_bkt;
+ uint64_t tck_int;
uint64_t arm_cnt;
+ uintptr_t base;
uint8_t prod_type_sp;
uint8_t enable_stats;
uint8_t disable_npa;
@@ -163,19 +163,19 @@ cnxk_tim_priv_get(void)
return mz->addr;
}
-static inline long double
+static inline double
cnxk_tim_ns_per_tck(uint64_t freq)
{
- return (long double)NSECPERSEC / freq;
+ return (double)NSECPERSEC / freq;
}
#ifdef RTE_ARCH_ARM64
static inline uint64_t
-cnxk_tim_cntvct(void)
+cnxk_tim_cntvct(uint64_t base __rte_unused)
{
uint64_t tsc;
- asm volatile("mrs %0, cntvct_el0" : "=r"(tsc));
+ asm volatile("mrs %0, CNTVCT_EL0" : "=r"(tsc)::"memory");
return tsc;
}
@@ -189,7 +189,7 @@ cnxk_tim_cntfrq(void)
}
#else
static inline uint64_t
-cnxk_tim_cntvct(void)
+cnxk_tim_cntvct(uint64_t base __rte_unused)
{
return 0;
}
@@ -201,6 +201,12 @@ cnxk_tim_cntfrq(void)
}
#endif
+static inline uint64_t
+cnxk_tim_tick_read(uint64_t tick_base)
+{
+ return plt_read64(tick_base);
+}
+
static inline enum roc_tim_clk_src
cnxk_tim_convert_clk_src(enum rte_event_timer_adapter_clk_src clk_src)
{
@@ -221,6 +227,27 @@ cnxk_tim_convert_clk_src(enum rte_event_timer_adapter_clk_src clk_src)
}
}
+static inline uintptr_t
+cnxk_tim_get_tick_base(enum roc_tim_clk_src clk_src, uintptr_t base)
+{
+ switch (clk_src) {
+ case ROC_TIM_CLK_SRC_GTI:
+ return base + TIM_LF_FR_RN_GTI;
+ case ROC_TIM_CLK_SRC_GPIO:
+ return base + TIM_LF_FR_RN_GPIOS;
+ case ROC_TIM_CLK_SRC_10NS:
+ return base + TIM_LF_FR_RN_TENNS;
+ case ROC_TIM_CLK_SRC_PTP:
+ return base + TIM_LF_FR_RN_PTP;
+ case ROC_TIM_CLK_SRC_SYNCE:
+ return base + TIM_LF_FR_RN_SYNCE;
+ case ROC_TIM_CLK_SRC_BTS:
+ return base + TIM_LF_FR_RN_BTS;
+ default:
+ return ROC_TIM_CLK_SRC_INVALID;
+ }
+}
+
static inline int
cnxk_tim_get_clk_freq(struct cnxk_tim_evdev *dev, enum roc_tim_clk_src clk_src,
uint64_t *freq)
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index dfcfbdc797..923a72093b 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -39,22 +39,6 @@ cnxk_tim_format_event(const struct rte_event_timer *const tim,
entry->wqe = tim->ev.u64;
}
-static inline void
-cnxk_tim_sync_start_cyc(struct cnxk_tim_ring *tim_ring)
-{
- uint64_t cur_cyc = cnxk_tim_cntvct();
- uint32_t real_bkt;
-
- if (cur_cyc - tim_ring->last_updt_cyc > tim_ring->tot_int) {
- real_bkt = plt_read64(tim_ring->base + TIM_LF_RING_REL) >> 44;
- cur_cyc = cnxk_tim_cntvct();
-
- tim_ring->ring_start_cyc =
- cur_cyc - (real_bkt * tim_ring->tck_int);
- tim_ring->last_updt_cyc = cur_cyc;
- }
-}
-
static __rte_always_inline uint16_t
cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
struct rte_event_timer **tim, const uint16_t nb_timers,
@@ -65,7 +49,6 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
uint16_t index;
int ret = 0;
- cnxk_tim_sync_start_cyc(tim_ring);
for (index = 0; index < nb_timers; index++) {
if (cnxk_tim_arm_checks(tim_ring, tim[index]))
break;
@@ -127,7 +110,6 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
return 0;
}
- cnxk_tim_sync_start_cyc(tim_ring);
while (arr_idx < nb_timers) {
for (idx = 0; idx < CNXK_TIM_MAX_BURST && (arr_idx < nb_timers);
idx++, arr_idx++) {
diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index 0c9f29cfbe..8d8ed1d3a1 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -131,7 +131,8 @@ cnxk_tim_get_target_bucket(struct cnxk_tim_ring *const tim_ring,
const uint32_t rel_bkt, struct cnxk_tim_bkt **bkt,
struct cnxk_tim_bkt **mirr_bkt)
{
- const uint64_t bkt_cyc = cnxk_tim_cntvct() - tim_ring->ring_start_cyc;
+ const uint64_t bkt_cyc =
+ tim_ring->tick_fn(tim_ring->tbase) - tim_ring->ring_start_cyc;
uint64_t bucket =
rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div) +
rel_bkt;
--
2.25.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 1/3] event/cnxk: avoid reading non cached registers
2022-07-19 11:11 ` [PATCH 2/4] event/cnxk: avoid reading non cached registers pbhagavatula
@ 2022-09-21 6:15 ` pbhagavatula
2022-09-21 6:15 ` [PATCH v2 2/3] event/cnxk: set dequeue mode to prefetch with wait pbhagavatula
2022-09-21 6:15 ` [PATCH v2 3/3] event/cnxk: disable timer resolution estimation pbhagavatula
0 siblings, 2 replies; 8+ messages in thread
From: pbhagavatula @ 2022-09-21 6:15 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Avoid reading non-cached registers in fastpath.
PENDSTATE need not be read before tag flush in tx enqueue
context as we have additional checks prior to check for
pending flushes.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
Depends-on: Series-24634
v2 Changes:
- Rebase on next-net-mrvl
drivers/event/cnxk/cn9k_worker.h | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 384b428ed1..881861f348 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -156,6 +156,15 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, uint64_t base,
}
}
+static __rte_always_inline void
+cn9k_sso_tx_tag_flush(uint64_t base)
+{
+ if (unlikely(CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_TAG)) ==
+ SSO_TT_EMPTY))
+ return;
+ plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+}
+
static __rte_always_inline void
cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
const uint32_t tag, const uint32_t flags,
@@ -835,7 +844,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
return 1;
}
- cnxk_sso_hws_swtag_flush(base);
+ cn9k_sso_tx_tag_flush(base);
return 1;
}
--
2.25.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 2/3] event/cnxk: set dequeue mode to prefetch with wait
2022-09-21 6:15 ` [PATCH v2 1/3] " pbhagavatula
@ 2022-09-21 6:15 ` pbhagavatula
2022-09-21 6:15 ` [PATCH v2 3/3] event/cnxk: disable timer resolution estimation pbhagavatula
1 sibling, 0 replies; 8+ messages in thread
From: pbhagavatula @ 2022-09-21 6:15 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Set default GetWork(dequeue) mode to prefetch with wait for
event.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 5 +++--
drivers/event/cnxk/cn10k_eventdev.c | 1 +
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 8537f6257e..69eabf7f22 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -79,8 +79,9 @@ Runtime Config Options
- ``CN10K Getwork mode``
- CN10K supports multiple getwork prefetch modes, by default the prefetch
- mode is set to none.
+ CN10K supports three getwork prefetch modes no prefetch[0], prefetch
+ immediately[1] and delayed prefetch on forward progress event[2].
+ The default getwork mode is 2.
For example::
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 1774455b4c..0651b2d15a 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -982,6 +982,7 @@ cn10k_sso_init(struct rte_eventdev *event_dev)
return 0;
}
+ dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
rc = cnxk_sso_init(event_dev);
if (rc < 0)
return rc;
--
2.25.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 3/3] event/cnxk: disable timer resolution estimation
2022-09-21 6:15 ` [PATCH v2 1/3] " pbhagavatula
2022-09-21 6:15 ` [PATCH v2 2/3] event/cnxk: set dequeue mode to prefetch with wait pbhagavatula
@ 2022-09-21 6:15 ` pbhagavatula
2022-09-27 11:01 ` Jerin Jacob
1 sibling, 1 reply; 8+ messages in thread
From: pbhagavatula @ 2022-09-21 6:15 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
Satha Rao, Pavan Nikhilesh, Shijith Thotton
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Disable timer resolution estimation, read TIM LF clock registers
to get the current running clock counter as estimating causes
time drift.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/common/cnxk/hw/tim.h | 6 ++
drivers/event/cnxk/cnxk_tim_evdev.c | 140 +++------------------------
drivers/event/cnxk/cnxk_tim_evdev.h | 49 +++++++---
drivers/event/cnxk/cnxk_tim_worker.c | 18 ----
drivers/event/cnxk/cnxk_tim_worker.h | 3 +-
5 files changed, 62 insertions(+), 154 deletions(-)
diff --git a/drivers/common/cnxk/hw/tim.h b/drivers/common/cnxk/hw/tim.h
index a0fe29ddcf..61c38ae175 100644
--- a/drivers/common/cnxk/hw/tim.h
+++ b/drivers/common/cnxk/hw/tim.h
@@ -31,6 +31,12 @@
#define TIM_LF_INT_VEC_NRSPERR_INT (0x0ull)
#define TIM_LF_INT_VEC_RAS_INT (0x1ull)
#define TIM_LF_RING_AURA (0x0)
+#define TIM_LF_FR_RN_GPIOS (0x020)
+#define TIM_LF_FR_RN_GTI (0x030)
+#define TIM_LF_FR_RN_PTP (0x040)
+#define TIM_LF_FR_RN_TENNS (0x050)
+#define TIM_LF_FR_RN_SYNCE (0x060)
+#define TIM_LF_FR_RN_BTS (0x070)
#define TIM_LF_RING_BASE (0x130)
#define TIM_LF_NRSPERR_INT (0x200)
#define TIM_LF_NRSPERR_INT_W1S (0x208)
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index af67235503..f8a536e71a 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -119,80 +119,6 @@ cnxk_tim_ring_info_get(const struct rte_event_timer_adapter *adptr,
sizeof(struct rte_event_timer_adapter_conf));
}
-static inline void
-sort_multi_array(double ref_arr[], uint64_t arr1[], uint64_t arr2[],
- uint64_t arr3[], uint8_t sz)
-{
- int x;
-
- for (x = 0; x < sz - 1; x++) {
- if (ref_arr[x] > ref_arr[x + 1]) {
- PLT_SWAP(ref_arr[x], ref_arr[x + 1]);
- PLT_SWAP(arr1[x], arr1[x + 1]);
- PLT_SWAP(arr2[x], arr2[x + 1]);
- PLT_SWAP(arr3[x], arr3[x + 1]);
- x = -1;
- }
- }
-}
-
-static inline void
-populate_sample(uint64_t tck[], uint64_t ns[], double diff[], uint64_t dst[],
- uint64_t req_tck, uint64_t clk_freq, double tck_ns, uint8_t sz,
- bool mov_fwd)
-{
- int i;
-
- for (i = 0; i < sz; i++) {
- tck[i] = i ? tck[i - 1] : req_tck;
- do {
- mov_fwd ? tck[i]++ : tck[i]--;
- ns[i] = round((double)tck[i] * tck_ns);
- if (round((double)tck[i] * tck_ns) >
- ((double)tck[i] * tck_ns))
- continue;
- } while (ns[i] % (uint64_t)cnxk_tim_ns_per_tck(clk_freq));
- diff[i] = PLT_MAX((double)ns[i], (double)tck[i] * tck_ns) -
- PLT_MIN((double)ns[i], (double)tck[i] * tck_ns);
- dst[i] = mov_fwd ? tck[i] - req_tck : req_tck - tck[i];
- }
-}
-
-static void
-tim_adjust_resolution(uint64_t *req_ns, uint64_t *req_tck, double tck_ns,
- uint64_t clk_freq, uint64_t max_tmo, uint64_t m_tck)
-{
-#define MAX_SAMPLES 5
- double rmax_diff[MAX_SAMPLES], rmin_diff[MAX_SAMPLES];
- uint64_t min_tck[MAX_SAMPLES], max_tck[MAX_SAMPLES];
- uint64_t min_dst[MAX_SAMPLES], max_dst[MAX_SAMPLES];
- uint64_t min_ns[MAX_SAMPLES], max_ns[MAX_SAMPLES];
- int i;
-
- populate_sample(max_tck, max_ns, rmax_diff, max_dst, *req_tck, clk_freq,
- tck_ns, MAX_SAMPLES, true);
- sort_multi_array(rmax_diff, max_dst, max_tck, max_ns, MAX_SAMPLES);
-
- populate_sample(min_tck, min_ns, rmin_diff, min_dst, *req_tck, clk_freq,
- tck_ns, MAX_SAMPLES, false);
- sort_multi_array(rmin_diff, min_dst, min_tck, min_ns, MAX_SAMPLES);
-
- for (i = 0; i < MAX_SAMPLES; i++) {
- if (min_dst[i] < max_dst[i] && min_tck[i] > m_tck &&
- (max_tmo / min_ns[i]) <=
- (TIM_MAX_BUCKET_SIZE - TIM_MIN_BUCKET_SIZE)) {
- *req_tck = min_tck[i];
- *req_ns = min_ns[i];
- break;
- } else if ((max_tmo / max_ns[i]) <
- (TIM_MAX_BUCKET_SIZE - TIM_MIN_BUCKET_SIZE)) {
- *req_tck = max_tck[i];
- *req_ns = max_ns[i];
- break;
- }
- }
-}
-
static int
cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
{
@@ -263,27 +189,7 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
goto tim_hw_free;
}
- tim_ring->tck_nsec =
- round(RTE_ALIGN_MUL_NEAR((long double)rcfg->timer_tick_ns,
- cnxk_tim_ns_per_tck(clk_freq)));
- if (log10(clk_freq) - floor(log10(clk_freq)) != 0.0) {
- uint64_t req_ns, req_tck;
- double tck_ns;
-
- req_ns = tim_ring->tck_nsec;
- tck_ns = NSECPERSEC / clk_freq;
- req_tck = round(rcfg->timer_tick_ns / tck_ns);
- tim_adjust_resolution(&req_ns, &req_tck, tck_ns, clk_freq,
- rcfg->max_tmo_ns, min_intvl_cyc);
- if ((tim_ring->tck_nsec != req_ns) &&
- !(rcfg->flags & RTE_EVENT_TIMER_ADAPTER_F_ADJUST_RES)) {
- rc = -ERANGE;
- goto tim_hw_free;
- }
- tim_ring->tck_nsec = ceil(req_tck * tck_ns);
- }
-
- tim_ring->tck_int = round((long double)tim_ring->tck_nsec /
+ tim_ring->tck_int = round((double)rcfg->timer_tick_ns /
cnxk_tim_ns_per_tck(clk_freq));
tim_ring->tck_nsec =
ceil(tim_ring->tck_int * cnxk_tim_ns_per_tck(clk_freq));
@@ -296,6 +202,13 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
tim_ring->chunk_sz = dev->chunk_sz;
tim_ring->disable_npa = dev->disable_npa;
tim_ring->enable_stats = dev->enable_stats;
+ tim_ring->base = roc_tim_lf_base_get(&dev->tim, tim_ring->ring_id);
+ tim_ring->tbase = cnxk_tim_get_tick_base(clk_src, tim_ring->base);
+
+ if (roc_model_is_cn9k() && (tim_ring->clk_src == ROC_TIM_CLK_SRC_GTI))
+ tim_ring->tick_fn = cnxk_tim_cntvct;
+ else
+ tim_ring->tick_fn = cnxk_tim_tick_read;
for (i = 0; i < dev->ring_ctl_cnt; i++) {
struct cnxk_tim_ctl *ring_ctl = &dev->ring_ctl_data[i];
@@ -342,7 +255,6 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
goto tim_chnk_free;
}
- tim_ring->base = roc_tim_lf_base_get(&dev->tim, tim_ring->ring_id);
plt_write64((uint64_t)tim_ring->bkt, tim_ring->base + TIM_LF_RING_BASE);
plt_write64(tim_ring->aura, tim_ring->base + TIM_LF_RING_AURA);
@@ -391,31 +303,6 @@ cnxk_tim_ring_free(struct rte_event_timer_adapter *adptr)
return 0;
}
-static void
-cnxk_tim_calibrate_start_tsc(struct cnxk_tim_ring *tim_ring)
-{
-#define CNXK_TIM_CALIB_ITER 1E6
- uint32_t real_bkt, bucket;
- int icount, ecount = 0;
- uint64_t bkt_cyc;
-
- for (icount = 0; icount < CNXK_TIM_CALIB_ITER; icount++) {
- real_bkt = plt_read64(tim_ring->base + TIM_LF_RING_REL) >> 44;
- bkt_cyc = cnxk_tim_cntvct();
- bucket = (bkt_cyc - tim_ring->ring_start_cyc) /
- tim_ring->tck_int;
- bucket = bucket % (tim_ring->nb_bkts);
- tim_ring->ring_start_cyc =
- bkt_cyc - (real_bkt * tim_ring->tck_int);
- if (bucket != real_bkt)
- ecount++;
- }
- tim_ring->last_updt_cyc = bkt_cyc;
- plt_tim_dbg("Bucket mispredict %3.2f distance %d\n",
- 100 - (((double)(icount - ecount) / (double)icount) * 100),
- bucket - real_bkt);
-}
-
static int
cnxk_tim_ring_start(const struct rte_event_timer_adapter *adptr)
{
@@ -431,12 +318,16 @@ cnxk_tim_ring_start(const struct rte_event_timer_adapter *adptr)
if (rc < 0)
return rc;
- tim_ring->tot_int = tim_ring->tck_int * tim_ring->nb_bkts;
tim_ring->fast_div = rte_reciprocal_value_u64(tim_ring->tck_int);
tim_ring->fast_bkt = rte_reciprocal_value_u64(tim_ring->nb_bkts);
- cnxk_tim_calibrate_start_tsc(tim_ring);
+ if (roc_model_is_cn9k() && (tim_ring->clk_src == ROC_TIM_CLK_SRC_GTI)) {
+ uint64_t start_diff;
+ start_diff = cnxk_tim_cntvct(tim_ring->tbase) -
+ cnxk_tim_tick_read(tim_ring->tbase);
+ tim_ring->ring_start_cyc += start_diff;
+ }
return rc;
}
@@ -462,7 +353,8 @@ cnxk_tim_stats_get(const struct rte_event_timer_adapter *adapter,
struct rte_event_timer_adapter_stats *stats)
{
struct cnxk_tim_ring *tim_ring = adapter->data->adapter_priv;
- uint64_t bkt_cyc = cnxk_tim_cntvct() - tim_ring->ring_start_cyc;
+ uint64_t bkt_cyc =
+ tim_ring->tick_fn(tim_ring->tbase) - tim_ring->ring_start_cyc;
stats->evtim_exp_count =
__atomic_load_n(&tim_ring->arm_cnt, __ATOMIC_RELAXED);
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index 91a90ee2ce..0fda9f4f13 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -24,14 +24,14 @@
#define CNXK_TIM_EVDEV_NAME cnxk_tim_eventdev
#define CNXK_TIM_MAX_BUCKETS (0xFFFFF)
-#define CNXK_TIM_RING_DEF_CHUNK_SZ (4096)
+#define CNXK_TIM_RING_DEF_CHUNK_SZ (256)
#define CNXK_TIM_CHUNK_ALIGNMENT (16)
#define CNXK_TIM_MAX_BURST \
(RTE_CACHE_LINE_SIZE / CNXK_TIM_CHUNK_ALIGNMENT)
#define CNXK_TIM_NB_CHUNK_SLOTS(sz) (((sz) / CNXK_TIM_CHUNK_ALIGNMENT) - 1)
#define CNXK_TIM_MIN_CHUNK_SLOTS (0x1)
#define CNXK_TIM_MAX_CHUNK_SLOTS (0x1FFE)
-#define CNXK_TIM_MAX_POOL_CACHE_SZ (128)
+#define CNXK_TIM_MAX_POOL_CACHE_SZ (16)
#define CN9K_TIM_MIN_TMO_TKS (256)
@@ -119,18 +119,18 @@ struct cnxk_tim_bkt {
};
struct cnxk_tim_ring {
- uintptr_t base;
uint16_t nb_chunk_slots;
uint32_t nb_bkts;
- uint64_t last_updt_cyc;
+ uintptr_t tbase;
+ uint64_t (*tick_fn)(uint64_t tbase);
uint64_t ring_start_cyc;
- uint64_t tck_int;
- uint64_t tot_int;
struct cnxk_tim_bkt *bkt;
struct rte_mempool *chunk_pool;
struct rte_reciprocal_u64 fast_div;
struct rte_reciprocal_u64 fast_bkt;
+ uint64_t tck_int;
uint64_t arm_cnt;
+ uintptr_t base;
uint8_t prod_type_sp;
uint8_t enable_stats;
uint8_t disable_npa;
@@ -163,19 +163,19 @@ cnxk_tim_priv_get(void)
return mz->addr;
}
-static inline long double
+static inline double
cnxk_tim_ns_per_tck(uint64_t freq)
{
- return (long double)NSECPERSEC / freq;
+ return (double)NSECPERSEC / freq;
}
#ifdef RTE_ARCH_ARM64
static inline uint64_t
-cnxk_tim_cntvct(void)
+cnxk_tim_cntvct(uint64_t base __rte_unused)
{
uint64_t tsc;
- asm volatile("mrs %0, cntvct_el0" : "=r"(tsc));
+ asm volatile("mrs %0, CNTVCT_EL0" : "=r"(tsc)::"memory");
return tsc;
}
@@ -189,7 +189,7 @@ cnxk_tim_cntfrq(void)
}
#else
static inline uint64_t
-cnxk_tim_cntvct(void)
+cnxk_tim_cntvct(uint64_t base __rte_unused)
{
return 0;
}
@@ -201,6 +201,12 @@ cnxk_tim_cntfrq(void)
}
#endif
+static inline uint64_t
+cnxk_tim_tick_read(uint64_t tick_base)
+{
+ return plt_read64(tick_base);
+}
+
static inline enum roc_tim_clk_src
cnxk_tim_convert_clk_src(enum rte_event_timer_adapter_clk_src clk_src)
{
@@ -221,6 +227,27 @@ cnxk_tim_convert_clk_src(enum rte_event_timer_adapter_clk_src clk_src)
}
}
+static inline uintptr_t
+cnxk_tim_get_tick_base(enum roc_tim_clk_src clk_src, uintptr_t base)
+{
+ switch (clk_src) {
+ case ROC_TIM_CLK_SRC_GTI:
+ return base + TIM_LF_FR_RN_GTI;
+ case ROC_TIM_CLK_SRC_GPIO:
+ return base + TIM_LF_FR_RN_GPIOS;
+ case ROC_TIM_CLK_SRC_10NS:
+ return base + TIM_LF_FR_RN_TENNS;
+ case ROC_TIM_CLK_SRC_PTP:
+ return base + TIM_LF_FR_RN_PTP;
+ case ROC_TIM_CLK_SRC_SYNCE:
+ return base + TIM_LF_FR_RN_SYNCE;
+ case ROC_TIM_CLK_SRC_BTS:
+ return base + TIM_LF_FR_RN_BTS;
+ default:
+ return ROC_TIM_CLK_SRC_INVALID;
+ }
+}
+
static inline int
cnxk_tim_get_clk_freq(struct cnxk_tim_evdev *dev, enum roc_tim_clk_src clk_src,
uint64_t *freq)
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index dfcfbdc797..923a72093b 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -39,22 +39,6 @@ cnxk_tim_format_event(const struct rte_event_timer *const tim,
entry->wqe = tim->ev.u64;
}
-static inline void
-cnxk_tim_sync_start_cyc(struct cnxk_tim_ring *tim_ring)
-{
- uint64_t cur_cyc = cnxk_tim_cntvct();
- uint32_t real_bkt;
-
- if (cur_cyc - tim_ring->last_updt_cyc > tim_ring->tot_int) {
- real_bkt = plt_read64(tim_ring->base + TIM_LF_RING_REL) >> 44;
- cur_cyc = cnxk_tim_cntvct();
-
- tim_ring->ring_start_cyc =
- cur_cyc - (real_bkt * tim_ring->tck_int);
- tim_ring->last_updt_cyc = cur_cyc;
- }
-}
-
static __rte_always_inline uint16_t
cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
struct rte_event_timer **tim, const uint16_t nb_timers,
@@ -65,7 +49,6 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
uint16_t index;
int ret = 0;
- cnxk_tim_sync_start_cyc(tim_ring);
for (index = 0; index < nb_timers; index++) {
if (cnxk_tim_arm_checks(tim_ring, tim[index]))
break;
@@ -127,7 +110,6 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
return 0;
}
- cnxk_tim_sync_start_cyc(tim_ring);
while (arr_idx < nb_timers) {
for (idx = 0; idx < CNXK_TIM_MAX_BURST && (arr_idx < nb_timers);
idx++, arr_idx++) {
diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index 0c9f29cfbe..8d8ed1d3a1 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -131,7 +131,8 @@ cnxk_tim_get_target_bucket(struct cnxk_tim_ring *const tim_ring,
const uint32_t rel_bkt, struct cnxk_tim_bkt **bkt,
struct cnxk_tim_bkt **mirr_bkt)
{
- const uint64_t bkt_cyc = cnxk_tim_cntvct() - tim_ring->ring_start_cyc;
+ const uint64_t bkt_cyc =
+ tim_ring->tick_fn(tim_ring->tbase) - tim_ring->ring_start_cyc;
uint64_t bucket =
rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div) +
rel_bkt;
--
2.25.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 3/3] event/cnxk: disable timer resolution estimation
2022-09-21 6:15 ` [PATCH v2 3/3] event/cnxk: disable timer resolution estimation pbhagavatula
@ 2022-09-27 11:01 ` Jerin Jacob
0 siblings, 0 replies; 8+ messages in thread
From: Jerin Jacob @ 2022-09-27 11:01 UTC (permalink / raw)
To: pbhagavatula
Cc: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
Satha Rao, Shijith Thotton, dev
On Wed, Sep 21, 2022 at 11:48 AM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Disable timer resolution estimation, read TIM LF clock registers
> to get the current running clock counter as estimating causes
> time drift.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Applied to dpdk-next-net-eventdev/for-main. Thanks
> ---
> drivers/common/cnxk/hw/tim.h | 6 ++
> drivers/event/cnxk/cnxk_tim_evdev.c | 140 +++------------------------
> drivers/event/cnxk/cnxk_tim_evdev.h | 49 +++++++---
> drivers/event/cnxk/cnxk_tim_worker.c | 18 ----
> drivers/event/cnxk/cnxk_tim_worker.h | 3 +-
> 5 files changed, 62 insertions(+), 154 deletions(-)
>
> diff --git a/drivers/common/cnxk/hw/tim.h b/drivers/common/cnxk/hw/tim.h
> index a0fe29ddcf..61c38ae175 100644
> --- a/drivers/common/cnxk/hw/tim.h
> +++ b/drivers/common/cnxk/hw/tim.h
> @@ -31,6 +31,12 @@
> #define TIM_LF_INT_VEC_NRSPERR_INT (0x0ull)
> #define TIM_LF_INT_VEC_RAS_INT (0x1ull)
> #define TIM_LF_RING_AURA (0x0)
> +#define TIM_LF_FR_RN_GPIOS (0x020)
> +#define TIM_LF_FR_RN_GTI (0x030)
> +#define TIM_LF_FR_RN_PTP (0x040)
> +#define TIM_LF_FR_RN_TENNS (0x050)
> +#define TIM_LF_FR_RN_SYNCE (0x060)
> +#define TIM_LF_FR_RN_BTS (0x070)
> #define TIM_LF_RING_BASE (0x130)
> #define TIM_LF_NRSPERR_INT (0x200)
> #define TIM_LF_NRSPERR_INT_W1S (0x208)
> diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
> index af67235503..f8a536e71a 100644
> --- a/drivers/event/cnxk/cnxk_tim_evdev.c
> +++ b/drivers/event/cnxk/cnxk_tim_evdev.c
> @@ -119,80 +119,6 @@ cnxk_tim_ring_info_get(const struct rte_event_timer_adapter *adptr,
> sizeof(struct rte_event_timer_adapter_conf));
> }
>
> -static inline void
> -sort_multi_array(double ref_arr[], uint64_t arr1[], uint64_t arr2[],
> - uint64_t arr3[], uint8_t sz)
> -{
> - int x;
> -
> - for (x = 0; x < sz - 1; x++) {
> - if (ref_arr[x] > ref_arr[x + 1]) {
> - PLT_SWAP(ref_arr[x], ref_arr[x + 1]);
> - PLT_SWAP(arr1[x], arr1[x + 1]);
> - PLT_SWAP(arr2[x], arr2[x + 1]);
> - PLT_SWAP(arr3[x], arr3[x + 1]);
> - x = -1;
> - }
> - }
> -}
> -
> -static inline void
> -populate_sample(uint64_t tck[], uint64_t ns[], double diff[], uint64_t dst[],
> - uint64_t req_tck, uint64_t clk_freq, double tck_ns, uint8_t sz,
> - bool mov_fwd)
> -{
> - int i;
> -
> - for (i = 0; i < sz; i++) {
> - tck[i] = i ? tck[i - 1] : req_tck;
> - do {
> - mov_fwd ? tck[i]++ : tck[i]--;
> - ns[i] = round((double)tck[i] * tck_ns);
> - if (round((double)tck[i] * tck_ns) >
> - ((double)tck[i] * tck_ns))
> - continue;
> - } while (ns[i] % (uint64_t)cnxk_tim_ns_per_tck(clk_freq));
> - diff[i] = PLT_MAX((double)ns[i], (double)tck[i] * tck_ns) -
> - PLT_MIN((double)ns[i], (double)tck[i] * tck_ns);
> - dst[i] = mov_fwd ? tck[i] - req_tck : req_tck - tck[i];
> - }
> -}
> -
> -static void
> -tim_adjust_resolution(uint64_t *req_ns, uint64_t *req_tck, double tck_ns,
> - uint64_t clk_freq, uint64_t max_tmo, uint64_t m_tck)
> -{
> -#define MAX_SAMPLES 5
> - double rmax_diff[MAX_SAMPLES], rmin_diff[MAX_SAMPLES];
> - uint64_t min_tck[MAX_SAMPLES], max_tck[MAX_SAMPLES];
> - uint64_t min_dst[MAX_SAMPLES], max_dst[MAX_SAMPLES];
> - uint64_t min_ns[MAX_SAMPLES], max_ns[MAX_SAMPLES];
> - int i;
> -
> - populate_sample(max_tck, max_ns, rmax_diff, max_dst, *req_tck, clk_freq,
> - tck_ns, MAX_SAMPLES, true);
> - sort_multi_array(rmax_diff, max_dst, max_tck, max_ns, MAX_SAMPLES);
> -
> - populate_sample(min_tck, min_ns, rmin_diff, min_dst, *req_tck, clk_freq,
> - tck_ns, MAX_SAMPLES, false);
> - sort_multi_array(rmin_diff, min_dst, min_tck, min_ns, MAX_SAMPLES);
> -
> - for (i = 0; i < MAX_SAMPLES; i++) {
> - if (min_dst[i] < max_dst[i] && min_tck[i] > m_tck &&
> - (max_tmo / min_ns[i]) <=
> - (TIM_MAX_BUCKET_SIZE - TIM_MIN_BUCKET_SIZE)) {
> - *req_tck = min_tck[i];
> - *req_ns = min_ns[i];
> - break;
> - } else if ((max_tmo / max_ns[i]) <
> - (TIM_MAX_BUCKET_SIZE - TIM_MIN_BUCKET_SIZE)) {
> - *req_tck = max_tck[i];
> - *req_ns = max_ns[i];
> - break;
> - }
> - }
> -}
> -
> static int
> cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
> {
> @@ -263,27 +189,7 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
> goto tim_hw_free;
> }
>
> - tim_ring->tck_nsec =
> - round(RTE_ALIGN_MUL_NEAR((long double)rcfg->timer_tick_ns,
> - cnxk_tim_ns_per_tck(clk_freq)));
> - if (log10(clk_freq) - floor(log10(clk_freq)) != 0.0) {
> - uint64_t req_ns, req_tck;
> - double tck_ns;
> -
> - req_ns = tim_ring->tck_nsec;
> - tck_ns = NSECPERSEC / clk_freq;
> - req_tck = round(rcfg->timer_tick_ns / tck_ns);
> - tim_adjust_resolution(&req_ns, &req_tck, tck_ns, clk_freq,
> - rcfg->max_tmo_ns, min_intvl_cyc);
> - if ((tim_ring->tck_nsec != req_ns) &&
> - !(rcfg->flags & RTE_EVENT_TIMER_ADAPTER_F_ADJUST_RES)) {
> - rc = -ERANGE;
> - goto tim_hw_free;
> - }
> - tim_ring->tck_nsec = ceil(req_tck * tck_ns);
> - }
> -
> - tim_ring->tck_int = round((long double)tim_ring->tck_nsec /
> + tim_ring->tck_int = round((double)rcfg->timer_tick_ns /
> cnxk_tim_ns_per_tck(clk_freq));
> tim_ring->tck_nsec =
> ceil(tim_ring->tck_int * cnxk_tim_ns_per_tck(clk_freq));
> @@ -296,6 +202,13 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
> tim_ring->chunk_sz = dev->chunk_sz;
> tim_ring->disable_npa = dev->disable_npa;
> tim_ring->enable_stats = dev->enable_stats;
> + tim_ring->base = roc_tim_lf_base_get(&dev->tim, tim_ring->ring_id);
> + tim_ring->tbase = cnxk_tim_get_tick_base(clk_src, tim_ring->base);
> +
> + if (roc_model_is_cn9k() && (tim_ring->clk_src == ROC_TIM_CLK_SRC_GTI))
> + tim_ring->tick_fn = cnxk_tim_cntvct;
> + else
> + tim_ring->tick_fn = cnxk_tim_tick_read;
>
> for (i = 0; i < dev->ring_ctl_cnt; i++) {
> struct cnxk_tim_ctl *ring_ctl = &dev->ring_ctl_data[i];
> @@ -342,7 +255,6 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
> goto tim_chnk_free;
> }
>
> - tim_ring->base = roc_tim_lf_base_get(&dev->tim, tim_ring->ring_id);
> plt_write64((uint64_t)tim_ring->bkt, tim_ring->base + TIM_LF_RING_BASE);
> plt_write64(tim_ring->aura, tim_ring->base + TIM_LF_RING_AURA);
>
> @@ -391,31 +303,6 @@ cnxk_tim_ring_free(struct rte_event_timer_adapter *adptr)
> return 0;
> }
>
> -static void
> -cnxk_tim_calibrate_start_tsc(struct cnxk_tim_ring *tim_ring)
> -{
> -#define CNXK_TIM_CALIB_ITER 1E6
> - uint32_t real_bkt, bucket;
> - int icount, ecount = 0;
> - uint64_t bkt_cyc;
> -
> - for (icount = 0; icount < CNXK_TIM_CALIB_ITER; icount++) {
> - real_bkt = plt_read64(tim_ring->base + TIM_LF_RING_REL) >> 44;
> - bkt_cyc = cnxk_tim_cntvct();
> - bucket = (bkt_cyc - tim_ring->ring_start_cyc) /
> - tim_ring->tck_int;
> - bucket = bucket % (tim_ring->nb_bkts);
> - tim_ring->ring_start_cyc =
> - bkt_cyc - (real_bkt * tim_ring->tck_int);
> - if (bucket != real_bkt)
> - ecount++;
> - }
> - tim_ring->last_updt_cyc = bkt_cyc;
> - plt_tim_dbg("Bucket mispredict %3.2f distance %d\n",
> - 100 - (((double)(icount - ecount) / (double)icount) * 100),
> - bucket - real_bkt);
> -}
> -
> static int
> cnxk_tim_ring_start(const struct rte_event_timer_adapter *adptr)
> {
> @@ -431,12 +318,16 @@ cnxk_tim_ring_start(const struct rte_event_timer_adapter *adptr)
> if (rc < 0)
> return rc;
>
> - tim_ring->tot_int = tim_ring->tck_int * tim_ring->nb_bkts;
> tim_ring->fast_div = rte_reciprocal_value_u64(tim_ring->tck_int);
> tim_ring->fast_bkt = rte_reciprocal_value_u64(tim_ring->nb_bkts);
>
> - cnxk_tim_calibrate_start_tsc(tim_ring);
> + if (roc_model_is_cn9k() && (tim_ring->clk_src == ROC_TIM_CLK_SRC_GTI)) {
> + uint64_t start_diff;
>
> + start_diff = cnxk_tim_cntvct(tim_ring->tbase) -
> + cnxk_tim_tick_read(tim_ring->tbase);
> + tim_ring->ring_start_cyc += start_diff;
> + }
> return rc;
> }
>
> @@ -462,7 +353,8 @@ cnxk_tim_stats_get(const struct rte_event_timer_adapter *adapter,
> struct rte_event_timer_adapter_stats *stats)
> {
> struct cnxk_tim_ring *tim_ring = adapter->data->adapter_priv;
> - uint64_t bkt_cyc = cnxk_tim_cntvct() - tim_ring->ring_start_cyc;
> + uint64_t bkt_cyc =
> + tim_ring->tick_fn(tim_ring->tbase) - tim_ring->ring_start_cyc;
>
> stats->evtim_exp_count =
> __atomic_load_n(&tim_ring->arm_cnt, __ATOMIC_RELAXED);
> diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
> index 91a90ee2ce..0fda9f4f13 100644
> --- a/drivers/event/cnxk/cnxk_tim_evdev.h
> +++ b/drivers/event/cnxk/cnxk_tim_evdev.h
> @@ -24,14 +24,14 @@
>
> #define CNXK_TIM_EVDEV_NAME cnxk_tim_eventdev
> #define CNXK_TIM_MAX_BUCKETS (0xFFFFF)
> -#define CNXK_TIM_RING_DEF_CHUNK_SZ (4096)
> +#define CNXK_TIM_RING_DEF_CHUNK_SZ (256)
> #define CNXK_TIM_CHUNK_ALIGNMENT (16)
> #define CNXK_TIM_MAX_BURST \
> (RTE_CACHE_LINE_SIZE / CNXK_TIM_CHUNK_ALIGNMENT)
> #define CNXK_TIM_NB_CHUNK_SLOTS(sz) (((sz) / CNXK_TIM_CHUNK_ALIGNMENT) - 1)
> #define CNXK_TIM_MIN_CHUNK_SLOTS (0x1)
> #define CNXK_TIM_MAX_CHUNK_SLOTS (0x1FFE)
> -#define CNXK_TIM_MAX_POOL_CACHE_SZ (128)
> +#define CNXK_TIM_MAX_POOL_CACHE_SZ (16)
>
> #define CN9K_TIM_MIN_TMO_TKS (256)
>
> @@ -119,18 +119,18 @@ struct cnxk_tim_bkt {
> };
>
> struct cnxk_tim_ring {
> - uintptr_t base;
> uint16_t nb_chunk_slots;
> uint32_t nb_bkts;
> - uint64_t last_updt_cyc;
> + uintptr_t tbase;
> + uint64_t (*tick_fn)(uint64_t tbase);
> uint64_t ring_start_cyc;
> - uint64_t tck_int;
> - uint64_t tot_int;
> struct cnxk_tim_bkt *bkt;
> struct rte_mempool *chunk_pool;
> struct rte_reciprocal_u64 fast_div;
> struct rte_reciprocal_u64 fast_bkt;
> + uint64_t tck_int;
> uint64_t arm_cnt;
> + uintptr_t base;
> uint8_t prod_type_sp;
> uint8_t enable_stats;
> uint8_t disable_npa;
> @@ -163,19 +163,19 @@ cnxk_tim_priv_get(void)
> return mz->addr;
> }
>
> -static inline long double
> +static inline double
> cnxk_tim_ns_per_tck(uint64_t freq)
> {
> - return (long double)NSECPERSEC / freq;
> + return (double)NSECPERSEC / freq;
> }
>
> #ifdef RTE_ARCH_ARM64
> static inline uint64_t
> -cnxk_tim_cntvct(void)
> +cnxk_tim_cntvct(uint64_t base __rte_unused)
> {
> uint64_t tsc;
>
> - asm volatile("mrs %0, cntvct_el0" : "=r"(tsc));
> + asm volatile("mrs %0, CNTVCT_EL0" : "=r"(tsc)::"memory");
> return tsc;
> }
>
> @@ -189,7 +189,7 @@ cnxk_tim_cntfrq(void)
> }
> #else
> static inline uint64_t
> -cnxk_tim_cntvct(void)
> +cnxk_tim_cntvct(uint64_t base __rte_unused)
> {
> return 0;
> }
> @@ -201,6 +201,12 @@ cnxk_tim_cntfrq(void)
> }
> #endif
>
> +static inline uint64_t
> +cnxk_tim_tick_read(uint64_t tick_base)
> +{
> + return plt_read64(tick_base);
> +}
> +
> static inline enum roc_tim_clk_src
> cnxk_tim_convert_clk_src(enum rte_event_timer_adapter_clk_src clk_src)
> {
> @@ -221,6 +227,27 @@ cnxk_tim_convert_clk_src(enum rte_event_timer_adapter_clk_src clk_src)
> }
> }
>
> +static inline uintptr_t
> +cnxk_tim_get_tick_base(enum roc_tim_clk_src clk_src, uintptr_t base)
> +{
> + switch (clk_src) {
> + case ROC_TIM_CLK_SRC_GTI:
> + return base + TIM_LF_FR_RN_GTI;
> + case ROC_TIM_CLK_SRC_GPIO:
> + return base + TIM_LF_FR_RN_GPIOS;
> + case ROC_TIM_CLK_SRC_10NS:
> + return base + TIM_LF_FR_RN_TENNS;
> + case ROC_TIM_CLK_SRC_PTP:
> + return base + TIM_LF_FR_RN_PTP;
> + case ROC_TIM_CLK_SRC_SYNCE:
> + return base + TIM_LF_FR_RN_SYNCE;
> + case ROC_TIM_CLK_SRC_BTS:
> + return base + TIM_LF_FR_RN_BTS;
> + default:
> + return ROC_TIM_CLK_SRC_INVALID;
> + }
> +}
> +
> static inline int
> cnxk_tim_get_clk_freq(struct cnxk_tim_evdev *dev, enum roc_tim_clk_src clk_src,
> uint64_t *freq)
> diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
> index dfcfbdc797..923a72093b 100644
> --- a/drivers/event/cnxk/cnxk_tim_worker.c
> +++ b/drivers/event/cnxk/cnxk_tim_worker.c
> @@ -39,22 +39,6 @@ cnxk_tim_format_event(const struct rte_event_timer *const tim,
> entry->wqe = tim->ev.u64;
> }
>
> -static inline void
> -cnxk_tim_sync_start_cyc(struct cnxk_tim_ring *tim_ring)
> -{
> - uint64_t cur_cyc = cnxk_tim_cntvct();
> - uint32_t real_bkt;
> -
> - if (cur_cyc - tim_ring->last_updt_cyc > tim_ring->tot_int) {
> - real_bkt = plt_read64(tim_ring->base + TIM_LF_RING_REL) >> 44;
> - cur_cyc = cnxk_tim_cntvct();
> -
> - tim_ring->ring_start_cyc =
> - cur_cyc - (real_bkt * tim_ring->tck_int);
> - tim_ring->last_updt_cyc = cur_cyc;
> - }
> -}
> -
> static __rte_always_inline uint16_t
> cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
> struct rte_event_timer **tim, const uint16_t nb_timers,
> @@ -65,7 +49,6 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
> uint16_t index;
> int ret = 0;
>
> - cnxk_tim_sync_start_cyc(tim_ring);
> for (index = 0; index < nb_timers; index++) {
> if (cnxk_tim_arm_checks(tim_ring, tim[index]))
> break;
> @@ -127,7 +110,6 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
> return 0;
> }
>
> - cnxk_tim_sync_start_cyc(tim_ring);
> while (arr_idx < nb_timers) {
> for (idx = 0; idx < CNXK_TIM_MAX_BURST && (arr_idx < nb_timers);
> idx++, arr_idx++) {
> diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
> index 0c9f29cfbe..8d8ed1d3a1 100644
> --- a/drivers/event/cnxk/cnxk_tim_worker.h
> +++ b/drivers/event/cnxk/cnxk_tim_worker.h
> @@ -131,7 +131,8 @@ cnxk_tim_get_target_bucket(struct cnxk_tim_ring *const tim_ring,
> const uint32_t rel_bkt, struct cnxk_tim_bkt **bkt,
> struct cnxk_tim_bkt **mirr_bkt)
> {
> - const uint64_t bkt_cyc = cnxk_tim_cntvct() - tim_ring->ring_start_cyc;
> + const uint64_t bkt_cyc =
> + tim_ring->tick_fn(tim_ring->tbase) - tim_ring->ring_start_cyc;
> uint64_t bucket =
> rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div) +
> rel_bkt;
> --
> 2.25.1
>
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2022-09-27 11:01 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-19 11:11 [PATCH 1/4] cnxk/net: add fc check in vector event Tx path pbhagavatula
2022-07-19 11:11 ` [PATCH 2/4] event/cnxk: avoid reading non cached registers pbhagavatula
2022-09-21 6:15 ` [PATCH v2 1/3] " pbhagavatula
2022-09-21 6:15 ` [PATCH v2 2/3] event/cnxk: set dequeue mode to prefetch with wait pbhagavatula
2022-09-21 6:15 ` [PATCH v2 3/3] event/cnxk: disable timer resolution estimation pbhagavatula
2022-09-27 11:01 ` Jerin Jacob
2022-07-19 11:11 ` [PATCH 3/4] event/cnxk: set dequeue mode to prefetch with wait pbhagavatula
2022-07-19 11:11 ` [PATCH 4/4] event/cnxk: disable timer resolution estimation pbhagavatula
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).