From: Pavan Nikhilesh <pbhagavatula@marvell.com> Fix HW race condition observed when timeout resolution is low (<5us). When HW traverses a given TIM bucket it will clear chunk_remainder, but since SW always decreases the chunk_remainder at the start of the arm routine it might cause a race where SW updates chunk_remainder after HW has cleared it that lead to nasty side effects. Fixes: 95e4e4ec7469 ("event/octeontx2: add timer arm timeout burst") Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/octeontx2/otx2_tim_worker.h | 141 +++++++++++++++++++--- 1 file changed, 124 insertions(+), 17 deletions(-) diff --git a/drivers/event/octeontx2/otx2_tim_worker.h b/drivers/event/octeontx2/otx2_tim_worker.h index 50db6543c..c896b5433 100644 --- a/drivers/event/octeontx2/otx2_tim_worker.h +++ b/drivers/event/octeontx2/otx2_tim_worker.h @@ -7,6 +7,13 @@ #include "otx2_tim_evdev.h" +static inline uint8_t +tim_bkt_fetch_lock(uint64_t w1) +{ + return (w1 >> TIM_BUCKET_W1_S_LOCK) & + TIM_BUCKET_W1_M_LOCK; +} + static inline int16_t tim_bkt_fetch_rem(uint64_t w1) { @@ -188,7 +195,6 @@ tim_insert_chunk(struct otx2_tim_bkt * const bkt, } else { bkt->first_chunk = (uintptr_t)chunk; } - return chunk; } @@ -208,11 +214,38 @@ tim_add_entry_sp(struct otx2_tim_ring * const tim_ring, __retry: /* Get Bucket sema*/ - lock_sema = tim_bkt_fetch_sema(bkt); + lock_sema = tim_bkt_fetch_sema_lock(bkt); /* Bucket related checks. */ - if (unlikely(tim_bkt_get_hbt(lock_sema))) - goto __retry; + if (unlikely(tim_bkt_get_hbt(lock_sema))) { + if (tim_bkt_get_nent(lock_sema) != 0) { + uint64_t hbt_state; +#ifdef RTE_ARCH_ARM64 + asm volatile( + " ldaxr %[hbt], [%[w1]] \n" + " tbz %[hbt], 33, dne%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldaxr %[hbt], [%[w1]] \n" + " tbnz %[hbt], 33, rty%= \n" + "dne%=: \n" + : [hbt] "=&r" (hbt_state) + : [w1] "r" ((&bkt->w1)) + : "memory" + ); +#else + do { + hbt_state = __atomic_load_n(&bkt->w1, + __ATOMIC_ACQUIRE); + } while (hbt_state & BIT_ULL(33)); +#endif + + if (!(hbt_state & BIT_ULL(34))) { + tim_bkt_dec_lock(bkt); + goto __retry; + } + } + } /* Insert the work. */ rem = tim_bkt_fetch_rem(lock_sema); @@ -224,14 +257,15 @@ tim_add_entry_sp(struct otx2_tim_ring * const tim_ring, chunk = tim_insert_chunk(bkt, tim_ring); if (unlikely(chunk == NULL)) { - tim_bkt_set_rem(bkt, 0); + bkt->chunk_remainder = 0; + tim_bkt_dec_lock(bkt); tim->impl_opaque[0] = 0; tim->impl_opaque[1] = 0; tim->state = RTE_EVENT_TIMER_ERROR; return -ENOMEM; } bkt->current_chunk = (uintptr_t)chunk; - tim_bkt_set_rem(bkt, tim_ring->nb_chunk_slots - 1); + bkt->chunk_remainder = tim_ring->nb_chunk_slots - 1; } else { chunk = (struct otx2_tim_ent *)(uintptr_t)bkt->current_chunk; chunk += tim_ring->nb_chunk_slots - rem; @@ -241,6 +275,7 @@ tim_add_entry_sp(struct otx2_tim_ring * const tim_ring, *chunk = *pent; tim_bkt_inc_nent(bkt); + tim_bkt_dec_lock(bkt); tim->impl_opaque[0] = (uintptr_t)chunk; tim->impl_opaque[1] = (uintptr_t)bkt; @@ -263,19 +298,60 @@ tim_add_entry_mp(struct otx2_tim_ring * const tim_ring, __retry: bkt = tim_get_target_bucket(tim_ring, rel_bkt, flags); - /* Get Bucket sema*/ lock_sema = tim_bkt_fetch_sema_lock(bkt); /* Bucket related checks. */ if (unlikely(tim_bkt_get_hbt(lock_sema))) { - tim_bkt_dec_lock(bkt); - goto __retry; + if (tim_bkt_get_nent(lock_sema) != 0) { + uint64_t hbt_state; +#ifdef RTE_ARCH_ARM64 + asm volatile( + " ldaxr %[hbt], [%[w1]] \n" + " tbz %[hbt], 33, dne%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldaxr %[hbt], [%[w1]] \n" + " tbnz %[hbt], 33, rty%= \n" + "dne%=: \n" + : [hbt] "=&r" (hbt_state) + : [w1] "r" ((&bkt->w1)) + : "memory" + ); +#else + do { + hbt_state = __atomic_load_n(&bkt->w1, + __ATOMIC_ACQUIRE); + } while (hbt_state & BIT_ULL(33)); +#endif + + if (!(hbt_state & BIT_ULL(34))) { + tim_bkt_dec_lock(bkt); + goto __retry; + } + } } rem = tim_bkt_fetch_rem(lock_sema); - if (rem < 0) { +#ifdef RTE_ARCH_ARM64 + asm volatile( + " ldaxrh %w[rem], [%[crem]] \n" + " tbz %w[rem], 15, dne%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldaxrh %w[rem], [%[crem]] \n" + " tbnz %w[rem], 15, rty%= \n" + "dne%=: \n" + : [rem] "=&r" (rem) + : [crem] "r" (&bkt->chunk_remainder) + : "memory" + ); +#else + while (__atomic_load_n(&bkt->chunk_remainder, + __ATOMIC_ACQUIRE) < 0) + ; +#endif /* Goto diff bucket. */ tim_bkt_dec_lock(bkt); goto __retry; @@ -294,17 +370,23 @@ tim_add_entry_mp(struct otx2_tim_ring * const tim_ring, tim->state = RTE_EVENT_TIMER_ERROR; return -ENOMEM; } - bkt->current_chunk = (uintptr_t)chunk; - tim_bkt_set_rem(bkt, tim_ring->nb_chunk_slots - 1); + *chunk = *pent; + while (tim_bkt_fetch_lock(lock_sema) != + (-tim_bkt_fetch_rem(lock_sema))) + lock_sema = __atomic_load_n(&bkt->w1, __ATOMIC_ACQUIRE); + + bkt->current_chunk = (uintptr_t)chunk; + __atomic_store_n(&bkt->chunk_remainder, + tim_ring->nb_chunk_slots - 1, __ATOMIC_RELEASE); } else { - chunk = (struct otx2_tim_ent *)(uintptr_t)bkt->current_chunk; + chunk = (struct otx2_tim_ent *)bkt->current_chunk; chunk += tim_ring->nb_chunk_slots - rem; + *chunk = *pent; } /* Copy work entry. */ - *chunk = *pent; - tim_bkt_dec_lock(bkt); tim_bkt_inc_nent(bkt); + tim_bkt_dec_lock(bkt); tim->impl_opaque[0] = (uintptr_t)chunk; tim->impl_opaque[1] = (uintptr_t)bkt; tim->state = RTE_EVENT_TIMER_ARMED; @@ -360,8 +442,33 @@ tim_add_entry_brst(struct otx2_tim_ring * const tim_ring, /* Bucket related checks. */ if (unlikely(tim_bkt_get_hbt(lock_sema))) { - tim_bkt_dec_lock(bkt); - goto __retry; + if (tim_bkt_get_nent(lock_sema) != 0) { + uint64_t hbt_state; +#ifdef RTE_ARCH_ARM64 + asm volatile( + " ldaxr %[hbt], [%[w1]] \n" + " tbz %[hbt], 33, dne%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldaxr %[hbt], [%[w1]] \n" + " tbnz %[hbt], 33, rty%= \n" + "dne%=: \n" + : [hbt] "=&r" (hbt_state) + : [w1] "r" ((&bkt->w1)) + : "memory" + ); +#else + do { + hbt_state = __atomic_load_n(&bkt->w1, + __ATOMIC_ACQUIRE); + } while (hbt_state & BIT_ULL(33)); +#endif + + if (!(hbt_state & BIT_ULL(34))) { + tim_bkt_dec_lock(bkt); + goto __retry; + } + } } chunk_remainder = tim_bkt_fetch_rem(lock_sema); -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Since TIM buckets are always aligned to 32B and our cache line size being 128B, we will always have a cache miss when reading current_chunk pointer. Avoid the cache miss by storing the current_chunk pointer in the bucket opposite to the current bucket. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/octeontx2/otx2_tim_worker.h | 69 ++++++++++++++--------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/drivers/event/octeontx2/otx2_tim_worker.h b/drivers/event/octeontx2/otx2_tim_worker.h index c896b5433..7b771fbca 100644 --- a/drivers/event/octeontx2/otx2_tim_worker.h +++ b/drivers/event/octeontx2/otx2_tim_worker.h @@ -115,20 +115,29 @@ tim_bkt_clr_nent(struct otx2_tim_bkt *bktp) return __atomic_and_fetch(&bktp->w1, v, __ATOMIC_ACQ_REL); } -static __rte_always_inline struct otx2_tim_bkt * +static __rte_always_inline void tim_get_target_bucket(struct otx2_tim_ring * const tim_ring, - const uint32_t rel_bkt, const uint8_t flag) + const uint32_t rel_bkt, struct otx2_tim_bkt **bkt, + struct otx2_tim_bkt **mirr_bkt, const uint8_t flag) { const uint64_t bkt_cyc = rte_rdtsc() - tim_ring->ring_start_cyc; uint32_t bucket = rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div) + rel_bkt; + uint32_t mirr_bucket = 0; - if (flag & OTX2_TIM_BKT_MOD) + if (flag & OTX2_TIM_BKT_MOD) { bucket = bucket % tim_ring->nb_bkts; - if (flag & OTX2_TIM_BKT_AND) + mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) % + tim_ring->nb_bkts; + } + if (flag & OTX2_TIM_BKT_AND) { bucket = bucket & (tim_ring->nb_bkts - 1); + mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) & + (tim_ring->nb_bkts - 1); + } - return &tim_ring->bkt[bucket]; + *bkt = &tim_ring->bkt[bucket]; + *mirr_bkt = &tim_ring->bkt[mirr_bucket]; } static struct otx2_tim_ent * @@ -153,6 +162,7 @@ tim_clr_bkt(struct otx2_tim_ring * const tim_ring, static struct otx2_tim_ent * tim_refill_chunk(struct otx2_tim_bkt * const bkt, + struct otx2_tim_bkt * const mirr_bkt, struct otx2_tim_ring * const tim_ring) { struct otx2_tim_ent *chunk; @@ -162,8 +172,8 @@ tim_refill_chunk(struct otx2_tim_bkt * const bkt, (void **)&chunk))) return NULL; if (bkt->nb_entry) { - *(uint64_t *)(((struct otx2_tim_ent *)(uintptr_t) - bkt->current_chunk) + + *(uint64_t *)(((struct otx2_tim_ent *) + mirr_bkt->current_chunk) + tim_ring->nb_chunk_slots) = (uintptr_t)chunk; } else { @@ -180,6 +190,7 @@ tim_refill_chunk(struct otx2_tim_bkt * const bkt, static struct otx2_tim_ent * tim_insert_chunk(struct otx2_tim_bkt * const bkt, + struct otx2_tim_bkt * const mirr_bkt, struct otx2_tim_ring * const tim_ring) { struct otx2_tim_ent *chunk; @@ -190,7 +201,7 @@ tim_insert_chunk(struct otx2_tim_bkt * const bkt, *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0; if (bkt->nb_entry) { *(uint64_t *)(((struct otx2_tim_ent *)(uintptr_t) - bkt->current_chunk) + + mirr_bkt->current_chunk) + tim_ring->nb_chunk_slots) = (uintptr_t)chunk; } else { bkt->first_chunk = (uintptr_t)chunk; @@ -205,14 +216,15 @@ tim_add_entry_sp(struct otx2_tim_ring * const tim_ring, const struct otx2_tim_ent * const pent, const uint8_t flags) { + struct otx2_tim_bkt *mirr_bkt; struct otx2_tim_ent *chunk; struct otx2_tim_bkt *bkt; uint64_t lock_sema; int16_t rem; - bkt = tim_get_target_bucket(tim_ring, rel_bkt, flags); - __retry: + tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags); + /* Get Bucket sema*/ lock_sema = tim_bkt_fetch_sema_lock(bkt); @@ -232,7 +244,7 @@ tim_add_entry_sp(struct otx2_tim_ring * const tim_ring, : [hbt] "=&r" (hbt_state) : [w1] "r" ((&bkt->w1)) : "memory" - ); + ); #else do { hbt_state = __atomic_load_n(&bkt->w1, @@ -246,15 +258,14 @@ tim_add_entry_sp(struct otx2_tim_ring * const tim_ring, } } } - /* Insert the work. */ rem = tim_bkt_fetch_rem(lock_sema); if (!rem) { if (flags & OTX2_TIM_ENA_FB) - chunk = tim_refill_chunk(bkt, tim_ring); + chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring); if (flags & OTX2_TIM_ENA_DFB) - chunk = tim_insert_chunk(bkt, tim_ring); + chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring); if (unlikely(chunk == NULL)) { bkt->chunk_remainder = 0; @@ -264,10 +275,10 @@ tim_add_entry_sp(struct otx2_tim_ring * const tim_ring, tim->state = RTE_EVENT_TIMER_ERROR; return -ENOMEM; } - bkt->current_chunk = (uintptr_t)chunk; + mirr_bkt->current_chunk = (uintptr_t)chunk; bkt->chunk_remainder = tim_ring->nb_chunk_slots - 1; } else { - chunk = (struct otx2_tim_ent *)(uintptr_t)bkt->current_chunk; + chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk; chunk += tim_ring->nb_chunk_slots - rem; } @@ -291,13 +302,14 @@ tim_add_entry_mp(struct otx2_tim_ring * const tim_ring, const struct otx2_tim_ent * const pent, const uint8_t flags) { + struct otx2_tim_bkt *mirr_bkt; struct otx2_tim_ent *chunk; struct otx2_tim_bkt *bkt; uint64_t lock_sema; int16_t rem; __retry: - bkt = tim_get_target_bucket(tim_ring, rel_bkt, flags); + tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags); /* Get Bucket sema*/ lock_sema = tim_bkt_fetch_sema_lock(bkt); @@ -317,7 +329,7 @@ tim_add_entry_mp(struct otx2_tim_ring * const tim_ring, : [hbt] "=&r" (hbt_state) : [w1] "r" ((&bkt->w1)) : "memory" - ); + ); #else do { hbt_state = __atomic_load_n(&bkt->w1, @@ -358,9 +370,9 @@ tim_add_entry_mp(struct otx2_tim_ring * const tim_ring, } else if (!rem) { /* Only one thread can be here*/ if (flags & OTX2_TIM_ENA_FB) - chunk = tim_refill_chunk(bkt, tim_ring); + chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring); if (flags & OTX2_TIM_ENA_DFB) - chunk = tim_insert_chunk(bkt, tim_ring); + chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring); if (unlikely(chunk == NULL)) { tim_bkt_set_rem(bkt, 0); @@ -375,11 +387,11 @@ tim_add_entry_mp(struct otx2_tim_ring * const tim_ring, (-tim_bkt_fetch_rem(lock_sema))) lock_sema = __atomic_load_n(&bkt->w1, __ATOMIC_ACQUIRE); - bkt->current_chunk = (uintptr_t)chunk; + mirr_bkt->current_chunk = (uintptr_t)chunk; __atomic_store_n(&bkt->chunk_remainder, tim_ring->nb_chunk_slots - 1, __ATOMIC_RELEASE); } else { - chunk = (struct otx2_tim_ent *)bkt->current_chunk; + chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk; chunk += tim_ring->nb_chunk_slots - rem; *chunk = *pent; } @@ -420,6 +432,7 @@ tim_add_entry_brst(struct otx2_tim_ring * const tim_ring, const uint16_t nb_timers, const uint8_t flags) { struct otx2_tim_ent *chunk = NULL; + struct otx2_tim_bkt *mirr_bkt; struct otx2_tim_bkt *bkt; uint16_t chunk_remainder; uint16_t index = 0; @@ -428,7 +441,7 @@ tim_add_entry_brst(struct otx2_tim_ring * const tim_ring, uint8_t lock_cnt; __retry: - bkt = tim_get_target_bucket(tim_ring, rel_bkt, flags); + tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags); /* Only one thread beyond this. */ lock_sema = tim_bkt_inc_lock(bkt); @@ -477,7 +490,7 @@ tim_add_entry_brst(struct otx2_tim_ring * const tim_ring, crem = tim_ring->nb_chunk_slots - chunk_remainder; if (chunk_remainder && crem) { chunk = ((struct otx2_tim_ent *) - (uintptr_t)bkt->current_chunk) + crem; + mirr_bkt->current_chunk) + crem; index = tim_cpy_wrk(index, chunk_remainder, chunk, tim, ents, bkt); @@ -486,9 +499,9 @@ tim_add_entry_brst(struct otx2_tim_ring * const tim_ring, } if (flags & OTX2_TIM_ENA_FB) - chunk = tim_refill_chunk(bkt, tim_ring); + chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring); if (flags & OTX2_TIM_ENA_DFB) - chunk = tim_insert_chunk(bkt, tim_ring); + chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring); if (unlikely(chunk == NULL)) { tim_bkt_dec_lock(bkt); @@ -497,14 +510,14 @@ tim_add_entry_brst(struct otx2_tim_ring * const tim_ring, return crem; } *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0; - bkt->current_chunk = (uintptr_t)chunk; + mirr_bkt->current_chunk = (uintptr_t)chunk; tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt); rem = nb_timers - chunk_remainder; tim_bkt_set_rem(bkt, tim_ring->nb_chunk_slots - rem); tim_bkt_add_nent(bkt, rem); } else { - chunk = (struct otx2_tim_ent *)(uintptr_t)bkt->current_chunk; + chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk; chunk += (tim_ring->nb_chunk_slots - chunk_remainder); tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt); -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable mempool cache for internal mempool to improve alloc performance. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/octeontx2/otx2_tim_evdev.c | 4 ++-- drivers/event/octeontx2/otx2_tim_worker.h | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/event/octeontx2/otx2_tim_evdev.c b/drivers/event/octeontx2/otx2_tim_evdev.c index e8316a6c7..206ed4331 100644 --- a/drivers/event/octeontx2/otx2_tim_evdev.c +++ b/drivers/event/octeontx2/otx2_tim_evdev.c @@ -124,6 +124,7 @@ tim_chnk_pool_create(struct otx2_tim_ring *tim_ring, char pool_name[25]; int rc; + cache_sz /= rte_lcore_count(); /* Create chunk pool. */ if (rcfg->flags & RTE_EVENT_TIMER_ADAPTER_F_SP_PUT) { mp_flags = MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET; @@ -138,10 +139,9 @@ tim_chnk_pool_create(struct otx2_tim_ring *tim_ring, cache_sz = RTE_MEMPOOL_CACHE_MAX_SIZE; if (!tim_ring->disable_npa) { - /* NPA need not have cache as free is not visible to SW */ tim_ring->chunk_pool = rte_mempool_create_empty(pool_name, tim_ring->nb_chunks, tim_ring->chunk_sz, - 0, 0, rte_socket_id(), mp_flags); + cache_sz, 0, rte_socket_id(), mp_flags); if (tim_ring->chunk_pool == NULL) { otx2_err("Unable to create chunkpool."); diff --git a/drivers/event/octeontx2/otx2_tim_worker.h b/drivers/event/octeontx2/otx2_tim_worker.h index 7b771fbca..af2f864d7 100644 --- a/drivers/event/octeontx2/otx2_tim_worker.h +++ b/drivers/event/octeontx2/otx2_tim_worker.h @@ -144,8 +144,12 @@ static struct otx2_tim_ent * tim_clr_bkt(struct otx2_tim_ring * const tim_ring, struct otx2_tim_bkt * const bkt) { +#define TIM_MAX_OUTSTANDING_OBJ 64 + void *pend_chunks[TIM_MAX_OUTSTANDING_OBJ]; struct otx2_tim_ent *chunk; struct otx2_tim_ent *pnext; + uint8_t objs = 0; + chunk = ((struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk); chunk = (struct otx2_tim_ent *)(uintptr_t)(chunk + @@ -153,10 +157,19 @@ tim_clr_bkt(struct otx2_tim_ring * const tim_ring, while (chunk) { pnext = (struct otx2_tim_ent *)(uintptr_t) ((chunk + tim_ring->nb_chunk_slots)->w0); - rte_mempool_put(tim_ring->chunk_pool, chunk); + if (objs == TIM_MAX_OUTSTANDING_OBJ) { + rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks, + objs); + objs = 0; + } + pend_chunks[objs++] = chunk; chunk = pnext; } + if (objs) + rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks, + objs); + return (struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk; } -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Update SSO internal XAQ buffers based on number of timers in event timer adapter. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/octeontx2/otx2_evdev.h | 6 +- drivers/event/octeontx2/otx2_evdev_adptr.c | 84 +++++++++++++++++----- drivers/event/octeontx2/otx2_tim_evdev.c | 7 +- drivers/event/octeontx2/otx2_tim_evdev.h | 1 + 4 files changed, 74 insertions(+), 24 deletions(-) diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h index 530060f81..231a12a52 100644 --- a/drivers/event/octeontx2/otx2_evdev.h +++ b/drivers/event/octeontx2/otx2_evdev.h @@ -14,6 +14,7 @@ #include "otx2_dev.h" #include "otx2_ethdev.h" #include "otx2_mempool.h" +#include "otx2_tim_evdev.h" #define EVENTDEV_NAME_OCTEONTX2_PMD otx2_eventdev @@ -137,9 +138,12 @@ struct otx2_sso_evdev { struct rte_mempool *xaq_pool; uint64_t rx_offloads; uint64_t tx_offloads; + uint64_t adptr_xae_cnt; uint16_t rx_adptr_pool_cnt; - uint32_t adptr_xae_cnt; uint64_t *rx_adptr_pools; + uint16_t tim_adptr_ring_cnt; + uint16_t *timer_adptr_rings; + uint64_t *timer_adptr_sz; /* Dev args */ uint8_t dual_ws; uint8_t selftest; diff --git a/drivers/event/octeontx2/otx2_evdev_adptr.c b/drivers/event/octeontx2/otx2_evdev_adptr.c index d8a06a593..233cba2aa 100644 --- a/drivers/event/octeontx2/otx2_evdev_adptr.c +++ b/drivers/event/octeontx2/otx2_evdev_adptr.c @@ -199,41 +199,87 @@ sso_rxq_disable(struct otx2_eth_dev *dev, uint16_t qid) void sso_updt_xae_cnt(struct otx2_sso_evdev *dev, void *data, uint32_t event_type) { + int i; + switch (event_type) { case RTE_EVENT_TYPE_ETHDEV: { struct otx2_eth_rxq *rxq = data; - int i, match = false; uint64_t *old_ptr; for (i = 0; i < dev->rx_adptr_pool_cnt; i++) { if ((uint64_t)rxq->pool == dev->rx_adptr_pools[i]) - match = true; - } - - if (!match) { - dev->rx_adptr_pool_cnt++; - old_ptr = dev->rx_adptr_pools; - dev->rx_adptr_pools = rte_realloc(dev->rx_adptr_pools, - sizeof(uint64_t) * - dev->rx_adptr_pool_cnt - , 0); - if (dev->rx_adptr_pools == NULL) { - dev->adptr_xae_cnt += rxq->pool->size; - dev->rx_adptr_pools = old_ptr; - dev->rx_adptr_pool_cnt--; return; - } - dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] = - (uint64_t)rxq->pool; + } + dev->rx_adptr_pool_cnt++; + old_ptr = dev->rx_adptr_pools; + dev->rx_adptr_pools = rte_realloc(dev->rx_adptr_pools, + sizeof(uint64_t) * + dev->rx_adptr_pool_cnt, 0); + if (dev->rx_adptr_pools == NULL) { dev->adptr_xae_cnt += rxq->pool->size; + dev->rx_adptr_pools = old_ptr; + dev->rx_adptr_pool_cnt--; + return; } + dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] = + (uint64_t)rxq->pool; + + dev->adptr_xae_cnt += rxq->pool->size; break; } case RTE_EVENT_TYPE_TIMER: { - dev->adptr_xae_cnt += (*(uint64_t *)data); + struct otx2_tim_ring *timr = data; + uint16_t *old_ring_ptr; + uint64_t *old_sz_ptr; + + for (i = 0; i < dev->tim_adptr_ring_cnt; i++) { + if (timr->ring_id != dev->timer_adptr_rings[i]) + continue; + if (timr->nb_timers == dev->timer_adptr_sz[i]) + return; + dev->adptr_xae_cnt -= dev->timer_adptr_sz[i]; + dev->adptr_xae_cnt += timr->nb_timers; + dev->timer_adptr_sz[i] = timr->nb_timers; + + return; + } + + dev->tim_adptr_ring_cnt++; + old_ring_ptr = dev->timer_adptr_rings; + old_sz_ptr = dev->timer_adptr_sz; + + dev->timer_adptr_rings = rte_realloc(dev->timer_adptr_rings, + sizeof(uint16_t) * + dev->tim_adptr_ring_cnt, + 0); + if (dev->timer_adptr_rings == NULL) { + dev->adptr_xae_cnt += timr->nb_timers; + dev->timer_adptr_rings = old_ring_ptr; + dev->tim_adptr_ring_cnt--; + return; + } + + dev->timer_adptr_sz = rte_realloc(dev->timer_adptr_sz, + sizeof(uint64_t) * + dev->tim_adptr_ring_cnt, + 0); + + if (dev->timer_adptr_sz == NULL) { + dev->adptr_xae_cnt += timr->nb_timers; + dev->timer_adptr_sz = old_sz_ptr; + dev->tim_adptr_ring_cnt--; + return; + } + + dev->timer_adptr_rings[dev->tim_adptr_ring_cnt - 1] = + timr->ring_id; + dev->timer_adptr_sz[dev->tim_adptr_ring_cnt - 1] = + timr->nb_timers; + + dev->adptr_xae_cnt += timr->nb_timers; break; } default: diff --git a/drivers/event/octeontx2/otx2_tim_evdev.c b/drivers/event/octeontx2/otx2_tim_evdev.c index 206ed4331..5f0233f44 100644 --- a/drivers/event/octeontx2/otx2_tim_evdev.c +++ b/drivers/event/octeontx2/otx2_tim_evdev.c @@ -254,7 +254,6 @@ otx2_tim_ring_create(struct rte_event_timer_adapter *adptr) struct tim_ring_req *free_req; struct tim_lf_alloc_req *req; struct tim_lf_alloc_rsp *rsp; - uint64_t nb_timers; int i, rc; if (dev == NULL) @@ -300,7 +299,7 @@ otx2_tim_ring_create(struct rte_event_timer_adapter *adptr) tim_ring->max_tout = rcfg->max_tmo_ns; tim_ring->nb_bkts = (tim_ring->max_tout / tim_ring->tck_nsec); tim_ring->chunk_sz = dev->chunk_sz; - nb_timers = rcfg->nb_timers; + tim_ring->nb_timers = rcfg->nb_timers; tim_ring->disable_npa = dev->disable_npa; tim_ring->enable_stats = dev->enable_stats; @@ -316,7 +315,7 @@ otx2_tim_ring_create(struct rte_event_timer_adapter *adptr) } } - tim_ring->nb_chunks = nb_timers / OTX2_TIM_NB_CHUNK_SLOTS( + tim_ring->nb_chunks = tim_ring->nb_timers / OTX2_TIM_NB_CHUNK_SLOTS( tim_ring->chunk_sz); tim_ring->nb_chunk_slots = OTX2_TIM_NB_CHUNK_SLOTS(tim_ring->chunk_sz); @@ -373,7 +372,7 @@ otx2_tim_ring_create(struct rte_event_timer_adapter *adptr) tim_set_fp_ops(tim_ring); /* Update SSO xae count. */ - sso_updt_xae_cnt(sso_pmd_priv(dev->event_dev), (void *)&nb_timers, + sso_updt_xae_cnt(sso_pmd_priv(dev->event_dev), (void *)tim_ring, RTE_EVENT_TYPE_TIMER); sso_xae_reconfigure(dev->event_dev); diff --git a/drivers/event/octeontx2/otx2_tim_evdev.h b/drivers/event/octeontx2/otx2_tim_evdev.h index eec0189c1..f3fe9697a 100644 --- a/drivers/event/octeontx2/otx2_tim_evdev.h +++ b/drivers/event/octeontx2/otx2_tim_evdev.h @@ -154,6 +154,7 @@ struct otx2_tim_ring { uint8_t ena_dfb; uint16_t ring_id; uint32_t aura; + uint64_t nb_timers; uint64_t tck_nsec; uint64_t max_tout; uint64_t nb_chunks; -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Update start timestamp periodically to prevent drift. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/octeontx2/otx2_tim_evdev.c | 28 +++++++++++++++++++++++ drivers/event/octeontx2/otx2_tim_evdev.h | 7 ++++-- drivers/event/octeontx2/otx2_tim_worker.c | 19 +++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/drivers/event/octeontx2/otx2_tim_evdev.c b/drivers/event/octeontx2/otx2_tim_evdev.c index 5f0233f44..b275c6922 100644 --- a/drivers/event/octeontx2/otx2_tim_evdev.c +++ b/drivers/event/octeontx2/otx2_tim_evdev.c @@ -389,6 +389,31 @@ otx2_tim_ring_create(struct rte_event_timer_adapter *adptr) return rc; } +static void +otx2_tim_calibrate_start_tsc(struct otx2_tim_ring *tim_ring) +{ +#define OTX2_TIM_CALIB_ITER 1E6 + uint32_t real_bkt, bucket; + int icount, ecount = 0; + uint64_t bkt_cyc; + + for (icount = 0; icount < OTX2_TIM_CALIB_ITER; icount++) { + real_bkt = otx2_read64(tim_ring->base + TIM_LF_RING_REL) >> 44; + bkt_cyc = rte_rdtsc(); + bucket = (bkt_cyc - tim_ring->ring_start_cyc) / + tim_ring->tck_int; + bucket = bucket % (tim_ring->nb_bkts); + tim_ring->ring_start_cyc = bkt_cyc - (real_bkt * + tim_ring->tck_int); + if (bucket != real_bkt) + ecount++; + } + tim_ring->last_updt_cyc = bkt_cyc; + otx2_tim_dbg("Bucket mispredict %3.2f distance %d\n", + 100 - (((double)(icount - ecount) / (double)icount) * 100), + bucket - real_bkt); +} + static int otx2_tim_ring_start(const struct rte_event_timer_adapter *adptr) { @@ -423,8 +448,11 @@ otx2_tim_ring_start(const struct rte_event_timer_adapter *adptr) tim_ring->ring_start_cyc = rsp->timestarted; #endif tim_ring->tck_int = NSEC2TICK(tim_ring->tck_nsec, rte_get_timer_hz()); + tim_ring->tot_int = tim_ring->tck_int * tim_ring->nb_bkts; tim_ring->fast_div = rte_reciprocal_value_u64(tim_ring->tck_int); + otx2_tim_calibrate_start_tsc(tim_ring); + fail: return rc; } diff --git a/drivers/event/octeontx2/otx2_tim_evdev.h b/drivers/event/octeontx2/otx2_tim_evdev.h index f3fe9697a..56895dcbf 100644 --- a/drivers/event/octeontx2/otx2_tim_evdev.h +++ b/drivers/event/octeontx2/otx2_tim_evdev.h @@ -25,6 +25,7 @@ #define TIM_LF_RAS_INT_W1S (0x308) #define TIM_LF_RAS_INT_ENA_W1S (0x310) #define TIM_LF_RAS_INT_ENA_W1C (0x318) +#define TIM_LF_RING_REL (0x400) #define TIM_BUCKET_W1_S_CHUNK_REMAINDER (48) #define TIM_BUCKET_W1_M_CHUNK_REMAINDER ((1ULL << (64 - \ @@ -139,13 +140,15 @@ struct otx2_tim_evdev { struct otx2_tim_ring { uintptr_t base; - struct rte_reciprocal_u64 fast_div; uint16_t nb_chunk_slots; uint32_t nb_bkts; + uint64_t last_updt_cyc; uint64_t ring_start_cyc; + uint64_t tck_int; + uint64_t tot_int; struct otx2_tim_bkt *bkt; struct rte_mempool *chunk_pool; - uint64_t tck_int; + struct rte_reciprocal_u64 fast_div; rte_atomic64_t arm_cnt; uint8_t prod_type_sp; uint8_t enable_stats; diff --git a/drivers/event/octeontx2/otx2_tim_worker.c b/drivers/event/octeontx2/otx2_tim_worker.c index feba61cd4..104674c79 100644 --- a/drivers/event/octeontx2/otx2_tim_worker.c +++ b/drivers/event/octeontx2/otx2_tim_worker.c @@ -38,6 +38,23 @@ tim_format_event(const struct rte_event_timer * const tim, entry->wqe = tim->ev.u64; } +static inline void +tim_sync_start_cyc(struct otx2_tim_ring *tim_ring) +{ + uint64_t cur_cyc = rte_rdtsc(); + uint32_t real_bkt; + + if (cur_cyc - tim_ring->last_updt_cyc > tim_ring->tot_int) { + real_bkt = otx2_read64(tim_ring->base + TIM_LF_RING_REL) >> 44; + cur_cyc = rte_rdtsc(); + + tim_ring->ring_start_cyc = cur_cyc - + (real_bkt * tim_ring->tck_int); + tim_ring->last_updt_cyc = cur_cyc; + } + +} + static __rte_always_inline uint16_t tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr, struct rte_event_timer **tim, @@ -49,6 +66,7 @@ tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr, uint16_t index; int ret; + tim_sync_start_cyc(tim_ring); for (index = 0; index < nb_timers; index++) { if (tim_arm_checks(tim_ring, tim[index])) break; @@ -99,6 +117,7 @@ tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr, return 0; } + tim_sync_start_cyc(tim_ring); while (arr_idx < nb_timers) { for (idx = 0; idx < OTX2_TIM_MAX_BURST && (arr_idx < nb_timers); idx++, arr_idx++) { -- 2.17.1
+Cc: stable@dpdk.org
>-----Original Message-----
>From: pbhagavatula@marvell.com <pbhagavatula@marvell.com>
>Sent: Friday, November 22, 2019 9:14 PM
>To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Pavan Nikhilesh
>Bhagavatula <pbhagavatula@marvell.com>
>Cc: dev@dpdk.org
>Subject: [dpdk-dev] [PATCH v3 1/5] event/octeontx2: fix TIM HW race
>condition
>
>From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
>Fix HW race condition observed when timeout resolution is low (<5us).
>When HW traverses a given TIM bucket it will clear chunk_remainder,
>but since SW always decreases the chunk_remainder at the start of the
>arm routine it might cause a race where SW updates chunk_remainder
>after HW has cleared it that lead to nasty side effects.
>
>Fixes: 95e4e4ec7469 ("event/octeontx2: add timer arm timeout burst")
>
>Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
>---
> drivers/event/octeontx2/otx2_tim_worker.h | 141
>+++++++++++++++++++---
> 1 file changed, 124 insertions(+), 17 deletions(-)
>
>diff --git a/drivers/event/octeontx2/otx2_tim_worker.h
>b/drivers/event/octeontx2/otx2_tim_worker.h
>index 50db6543c..c896b5433 100644
>--- a/drivers/event/octeontx2/otx2_tim_worker.h
>+++ b/drivers/event/octeontx2/otx2_tim_worker.h
>@@ -7,6 +7,13 @@
>
> #include "otx2_tim_evdev.h"
>
>+static inline uint8_t
>+tim_bkt_fetch_lock(uint64_t w1)
>+{
>+ return (w1 >> TIM_BUCKET_W1_S_LOCK) &
>+ TIM_BUCKET_W1_M_LOCK;
>+}
>+
> static inline int16_t
> tim_bkt_fetch_rem(uint64_t w1)
> {
>@@ -188,7 +195,6 @@ tim_insert_chunk(struct otx2_tim_bkt * const
>bkt,
> } else {
> bkt->first_chunk = (uintptr_t)chunk;
> }
>-
> return chunk;
> }
>
>@@ -208,11 +214,38 @@ tim_add_entry_sp(struct otx2_tim_ring *
>const tim_ring,
>
> __retry:
> /* Get Bucket sema*/
>- lock_sema = tim_bkt_fetch_sema(bkt);
>+ lock_sema = tim_bkt_fetch_sema_lock(bkt);
>
> /* Bucket related checks. */
>- if (unlikely(tim_bkt_get_hbt(lock_sema)))
>- goto __retry;
>+ if (unlikely(tim_bkt_get_hbt(lock_sema))) {
>+ if (tim_bkt_get_nent(lock_sema) != 0) {
>+ uint64_t hbt_state;
>+#ifdef RTE_ARCH_ARM64
>+ asm volatile(
>+ " ldaxr %[hbt], [%[w1]]
> \n"
>+ " tbz %[hbt], 33, dne%=
> \n"
>+ " sevl
> \n"
>+ "rty%=: wfe
> \n"
>+ " ldaxr %[hbt], [%[w1]]
> \n"
>+ " tbnz %[hbt], 33, rty%=
> \n"
>+ "dne%=:
> \n"
>+ : [hbt] "=&r" (hbt_state)
>+ : [w1] "r" ((&bkt->w1))
>+ : "memory"
>+ );
>+#else
>+ do {
>+ hbt_state = __atomic_load_n(&bkt-
>>w1,
>+ __ATOMIC_ACQUIRE);
>+ } while (hbt_state & BIT_ULL(33));
>+#endif
>+
>+ if (!(hbt_state & BIT_ULL(34))) {
>+ tim_bkt_dec_lock(bkt);
>+ goto __retry;
>+ }
>+ }
>+ }
>
> /* Insert the work. */
> rem = tim_bkt_fetch_rem(lock_sema);
>@@ -224,14 +257,15 @@ tim_add_entry_sp(struct otx2_tim_ring *
>const tim_ring,
> chunk = tim_insert_chunk(bkt, tim_ring);
>
> if (unlikely(chunk == NULL)) {
>- tim_bkt_set_rem(bkt, 0);
>+ bkt->chunk_remainder = 0;
>+ tim_bkt_dec_lock(bkt);
> tim->impl_opaque[0] = 0;
> tim->impl_opaque[1] = 0;
> tim->state = RTE_EVENT_TIMER_ERROR;
> return -ENOMEM;
> }
> bkt->current_chunk = (uintptr_t)chunk;
>- tim_bkt_set_rem(bkt, tim_ring->nb_chunk_slots - 1);
>+ bkt->chunk_remainder = tim_ring->nb_chunk_slots - 1;
> } else {
> chunk = (struct otx2_tim_ent *)(uintptr_t)bkt-
>>current_chunk;
> chunk += tim_ring->nb_chunk_slots - rem;
>@@ -241,6 +275,7 @@ tim_add_entry_sp(struct otx2_tim_ring * const
>tim_ring,
> *chunk = *pent;
>
> tim_bkt_inc_nent(bkt);
>+ tim_bkt_dec_lock(bkt);
>
> tim->impl_opaque[0] = (uintptr_t)chunk;
> tim->impl_opaque[1] = (uintptr_t)bkt;
>@@ -263,19 +298,60 @@ tim_add_entry_mp(struct otx2_tim_ring *
>const tim_ring,
>
> __retry:
> bkt = tim_get_target_bucket(tim_ring, rel_bkt, flags);
>-
> /* Get Bucket sema*/
> lock_sema = tim_bkt_fetch_sema_lock(bkt);
>
> /* Bucket related checks. */
> if (unlikely(tim_bkt_get_hbt(lock_sema))) {
>- tim_bkt_dec_lock(bkt);
>- goto __retry;
>+ if (tim_bkt_get_nent(lock_sema) != 0) {
>+ uint64_t hbt_state;
>+#ifdef RTE_ARCH_ARM64
>+ asm volatile(
>+ " ldaxr %[hbt], [%[w1]]
> \n"
>+ " tbz %[hbt], 33, dne%=
> \n"
>+ " sevl
> \n"
>+ "rty%=: wfe
> \n"
>+ " ldaxr %[hbt], [%[w1]]
> \n"
>+ " tbnz %[hbt], 33, rty%=
> \n"
>+ "dne%=:
> \n"
>+ : [hbt] "=&r" (hbt_state)
>+ : [w1] "r" ((&bkt->w1))
>+ : "memory"
>+ );
>+#else
>+ do {
>+ hbt_state = __atomic_load_n(&bkt-
>>w1,
>+ __ATOMIC_ACQUIRE);
>+ } while (hbt_state & BIT_ULL(33));
>+#endif
>+
>+ if (!(hbt_state & BIT_ULL(34))) {
>+ tim_bkt_dec_lock(bkt);
>+ goto __retry;
>+ }
>+ }
> }
>
> rem = tim_bkt_fetch_rem(lock_sema);
>-
> if (rem < 0) {
>+#ifdef RTE_ARCH_ARM64
>+ asm volatile(
>+ " ldaxrh %w[rem], [%[crem]]
> \n"
>+ " tbz %w[rem], 15, dne%=
> \n"
>+ " sevl
> \n"
>+ "rty%=: wfe
> \n"
>+ " ldaxrh %w[rem], [%[crem]]
> \n"
>+ " tbnz %w[rem], 15, rty%=
> \n"
>+ "dne%=:
> \n"
>+ : [rem] "=&r" (rem)
>+ : [crem] "r" (&bkt->chunk_remainder)
>+ : "memory"
>+ );
>+#else
>+ while (__atomic_load_n(&bkt->chunk_remainder,
>+ __ATOMIC_ACQUIRE) < 0)
>+ ;
>+#endif
> /* Goto diff bucket. */
> tim_bkt_dec_lock(bkt);
> goto __retry;
>@@ -294,17 +370,23 @@ tim_add_entry_mp(struct otx2_tim_ring *
>const tim_ring,
> tim->state = RTE_EVENT_TIMER_ERROR;
> return -ENOMEM;
> }
>- bkt->current_chunk = (uintptr_t)chunk;
>- tim_bkt_set_rem(bkt, tim_ring->nb_chunk_slots - 1);
>+ *chunk = *pent;
>+ while (tim_bkt_fetch_lock(lock_sema) !=
>+ (-tim_bkt_fetch_rem(lock_sema)))
>+ lock_sema = __atomic_load_n(&bkt->w1,
>__ATOMIC_ACQUIRE);
>+
>+ bkt->current_chunk = (uintptr_t)chunk;
>+ __atomic_store_n(&bkt->chunk_remainder,
>+ tim_ring->nb_chunk_slots - 1,
>__ATOMIC_RELEASE);
> } else {
>- chunk = (struct otx2_tim_ent *)(uintptr_t)bkt-
>>current_chunk;
>+ chunk = (struct otx2_tim_ent *)bkt->current_chunk;
> chunk += tim_ring->nb_chunk_slots - rem;
>+ *chunk = *pent;
> }
>
> /* Copy work entry. */
>- *chunk = *pent;
>- tim_bkt_dec_lock(bkt);
> tim_bkt_inc_nent(bkt);
>+ tim_bkt_dec_lock(bkt);
> tim->impl_opaque[0] = (uintptr_t)chunk;
> tim->impl_opaque[1] = (uintptr_t)bkt;
> tim->state = RTE_EVENT_TIMER_ARMED;
>@@ -360,8 +442,33 @@ tim_add_entry_brst(struct otx2_tim_ring *
>const tim_ring,
>
> /* Bucket related checks. */
> if (unlikely(tim_bkt_get_hbt(lock_sema))) {
>- tim_bkt_dec_lock(bkt);
>- goto __retry;
>+ if (tim_bkt_get_nent(lock_sema) != 0) {
>+ uint64_t hbt_state;
>+#ifdef RTE_ARCH_ARM64
>+ asm volatile(
>+ " ldaxr %[hbt], [%[w1]]
> \n"
>+ " tbz %[hbt], 33, dne%=
> \n"
>+ " sevl
> \n"
>+ "rty%=: wfe
> \n"
>+ " ldaxr %[hbt], [%[w1]]
> \n"
>+ " tbnz %[hbt], 33, rty%=
> \n"
>+ "dne%=:
> \n"
>+ : [hbt] "=&r" (hbt_state)
>+ : [w1] "r" ((&bkt->w1))
>+ : "memory"
>+ );
>+#else
>+ do {
>+ hbt_state = __atomic_load_n(&bkt-
>>w1,
>+ __ATOMIC_ACQUIRE);
>+ } while (hbt_state & BIT_ULL(33));
>+#endif
>+
>+ if (!(hbt_state & BIT_ULL(34))) {
>+ tim_bkt_dec_lock(bkt);
>+ goto __retry;
>+ }
>+ }
> }
>
> chunk_remainder = tim_bkt_fetch_rem(lock_sema);
>--
>2.17.1
On Sat, Nov 23, 2019 at 1:07 AM Pavan Nikhilesh Bhagavatula
<pbhagavatula@marvell.com> wrote:
>
> +Cc: stable@dpdk.org
>
> >-----Original Message-----
> >From: pbhagavatula@marvell.com <pbhagavatula@marvell.com>
> >Sent: Friday, November 22, 2019 9:14 PM
> >To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Pavan Nikhilesh
> >Bhagavatula <pbhagavatula@marvell.com>
> >Cc: dev@dpdk.org
> >Subject: [dpdk-dev] [PATCH v3 1/5] event/octeontx2: fix TIM HW race
> >condition
> >
> >From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >
> >Fix HW race condition observed when timeout resolution is low (<5us).
> >When HW traverses a given TIM bucket it will clear chunk_remainder,
> >but since SW always decreases the chunk_remainder at the start of the
> >arm routine it might cause a race where SW updates chunk_remainder
> >after HW has cleared it that lead to nasty side effects.
> >
> >Fixes: 95e4e4ec7469 ("event/octeontx2: add timer arm timeout burst")
> >
> >Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Series applied to dpdk-next-eventdev/master. Thanks.