If cache is enabled, objects will be retrieved/put from/to cache, subsequently from/to the common pool. Now the debug stats calculate the objects retrieved/put from/to cache and pool together, it is better to distinguish the data number from local cache and common pool. Signed-off-by: Joyce Kong <joyce.kong@arm.com> --- lib/librte_mempool/rte_mempool.c | 12 ++++++ lib/librte_mempool/rte_mempool.h | 64 ++++++++++++++++++++++---------- 2 files changed, 57 insertions(+), 19 deletions(-) diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index afb1239c8..9cb69367a 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -1244,8 +1244,14 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { sum.put_bulk += mp->stats[lcore_id].put_bulk; sum.put_objs += mp->stats[lcore_id].put_objs; + sum.put_objs_cache += mp->stats[lcore_id].put_objs_cache; + sum.put_objs_pool += mp->stats[lcore_id].put_objs_pool; + sum.put_objs_flush += mp->stats[lcore_id].put_objs_flush; sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk; sum.get_success_objs += mp->stats[lcore_id].get_success_objs; + sum.get_success_objs_cache += mp->stats[lcore_id].get_success_objs_cache; + sum.get_success_objs_pool += mp->stats[lcore_id].get_success_objs_pool; + sum.get_success_objs_refill += mp->stats[lcore_id].get_success_objs_refill; sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk; sum.get_fail_objs += mp->stats[lcore_id].get_fail_objs; sum.get_success_blks += mp->stats[lcore_id].get_success_blks; @@ -1254,8 +1260,14 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) fprintf(f, " stats:\n"); fprintf(f, " put_bulk=%"PRIu64"\n", sum.put_bulk); fprintf(f, " put_objs=%"PRIu64"\n", sum.put_objs); + fprintf(f, " put_objs_cache=%"PRIu64"\n", sum.put_objs_cache); + fprintf(f, " put_objs_pool=%"PRIu64"\n", sum.put_objs_pool); + fprintf(f, " put_objs_flush=%"PRIu64"\n", sum.put_objs_flush); fprintf(f, " get_success_bulk=%"PRIu64"\n", sum.get_success_bulk); fprintf(f, " get_success_objs=%"PRIu64"\n", sum.get_success_objs); + fprintf(f, " get_success_objs_cache=%"PRIu64"\n", sum.get_success_objs_cache); + fprintf(f, " get_success_objs_pool=%"PRIu64"\n", sum.get_success_objs_pool); + fprintf(f, " get_success_objs_refill=%"PRIu64"\n", sum.get_success_objs_refill); fprintf(f, " get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk); fprintf(f, " get_fail_objs=%"PRIu64"\n", sum.get_fail_objs); if (info.contig_block_size > 0) { diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h index c551cf733..29d80d97e 100644 --- a/lib/librte_mempool/rte_mempool.h +++ b/lib/librte_mempool/rte_mempool.h @@ -66,12 +66,18 @@ extern "C" { * A structure that stores the mempool statistics (per-lcore). */ struct rte_mempool_debug_stats { - uint64_t put_bulk; /**< Number of puts. */ - uint64_t put_objs; /**< Number of objects successfully put. */ - uint64_t get_success_bulk; /**< Successful allocation number. */ - uint64_t get_success_objs; /**< Objects successfully allocated. */ - uint64_t get_fail_bulk; /**< Failed allocation number. */ - uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ + uint64_t put_bulk; /**< Number of puts. */ + uint64_t put_objs; /**< Number of objects successfully put. */ + uint64_t put_objs_cache; /**< Number of objects successfully put to cache. */ + uint64_t put_objs_pool; /**< Number of objects successfully put to pool. */ + uint64_t put_objs_flush; /**< Number of flushing objects from cache to pool. */ + uint64_t get_success_bulk; /**< Successful allocation number. */ + uint64_t get_success_objs; /**< Objects successfully allocated. */ + uint64_t get_success_objs_cache; /**< Objects successfully allocated from cache. */ + uint64_t get_success_objs_pool; /**< Objects successfully allocated from pool. */ + uint64_t get_success_objs_refill; /**< Number of refilling objects from pool to cache. */ + uint64_t get_fail_bulk; /**< Failed allocation number. */ + uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ /** Successful allocation number of contiguous blocks. */ uint64_t get_success_blks; /** Failed allocation number of contiguous blocks. */ @@ -270,22 +276,34 @@ struct rte_mempool { * Number to add to the object-oriented statistics. */ #ifdef RTE_LIBRTE_MEMPOOL_DEBUG -#define __MEMPOOL_STAT_ADD(mp, name, n) do { \ - unsigned __lcore_id = rte_lcore_id(); \ - if (__lcore_id < RTE_MAX_LCORE) { \ +#define __MEMPOOL_STAT_ADD(mp, name, n) do { \ + unsigned __lcore_id = rte_lcore_id(); \ + if (__lcore_id < RTE_MAX_LCORE) { \ mp->stats[__lcore_id].name##_objs += n; \ - mp->stats[__lcore_id].name##_bulk += 1; \ - } \ - } while(0) -#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do { \ - unsigned int __lcore_id = rte_lcore_id(); \ - if (__lcore_id < RTE_MAX_LCORE) { \ + mp->stats[__lcore_id].name##_bulk += 1; \ + } \ + } while (0) +#define __MEMPOOL_OBJS_STAT_ADD(mp, name1, name2, n) do { \ + unsigned __lcore_id = rte_lcore_id(); \ + if (__lcore_id < RTE_MAX_LCORE) \ + mp->stats[__lcore_id].name1##_objs_##name2 += n; \ + } while (0) +#define __MEMPOOL_OBJS_STAT_SUB(mp, name1, name2, n) do { \ + unsigned __lcore_id = rte_lcore_id(); \ + if (__lcore_id < RTE_MAX_LCORE) \ + mp->stats[__lcore_id].name1##_objs_##name2 -= n; \ + } while (0) +#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do { \ + unsigned int __lcore_id = rte_lcore_id(); \ + if (__lcore_id < RTE_MAX_LCORE) { \ mp->stats[__lcore_id].name##_blks += n; \ mp->stats[__lcore_id].name##_bulk += 1; \ - } \ + } \ } while (0) #else -#define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0) +#define __MEMPOOL_STAT_ADD(mp, name, n) do {} while (0) +#define __MEMPOOL_OBJS_STAT_ADD(mp, name1, name2, n) do {} while (0) +#define __MEMPOOL_OBJS_STAT_SUB(mp, name1, nmae2, n) do {} while (0) #define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do {} while (0) #endif @@ -1305,10 +1323,13 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, /* Add elements back into the cache */ rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n); - + __MEMPOOL_OBJS_STAT_ADD(mp, put, cache, n); cache->len += n; if (cache->len >= cache->flushthresh) { + __MEMPOOL_OBJS_STAT_SUB(mp, put, cache, cache->len - cache->size); + __MEMPOOL_OBJS_STAT_ADD(mp, put, pool, cache->len - cache->size); + __MEMPOOL_OBJS_STAT_ADD(mp, put, flush, 1); rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size], cache->len - cache->size); cache->len = cache->size; @@ -1318,6 +1339,7 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, ring_enqueue: + __MEMPOOL_OBJS_STAT_ADD(mp, put, pool, n); /* push remaining objects in ring */ #ifdef RTE_LIBRTE_MEMPOOL_DEBUG if (rte_mempool_ops_enqueue_bulk(mp, obj_table, n) < 0) @@ -1437,6 +1459,7 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, goto ring_dequeue; } + __MEMPOOL_OBJS_STAT_ADD(mp, get_success, refill, 1); cache->len += req; } @@ -1447,6 +1470,7 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, cache->len -= n; __MEMPOOL_STAT_ADD(mp, get_success, n); + __MEMPOOL_OBJS_STAT_ADD(mp, get_success, cache, n); return 0; @@ -1457,8 +1481,10 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, if (ret < 0) __MEMPOOL_STAT_ADD(mp, get_fail, n); - else + else { __MEMPOOL_STAT_ADD(mp, get_success, n); + __MEMPOOL_OBJS_STAT_ADD(mp, get_success, pool, n); + } return ret; } -- 2.30.0
Hi Joyce, On Thu, Mar 18, 2021 at 07:20:22PM +0800, Joyce Kong wrote: > If cache is enabled, objects will be retrieved/put from/to cache, > subsequently from/to the common pool. Now the debug stats calculate > the objects retrieved/put from/to cache and pool together, it is > better to distinguish the data number from local cache and common > pool. This is indeed a very useful information, thanks for proposing this. Please see some comments below. > Signed-off-by: Joyce Kong <joyce.kong@arm.com> > --- > lib/librte_mempool/rte_mempool.c | 12 ++++++ > lib/librte_mempool/rte_mempool.h | 64 ++++++++++++++++++++++---------- > 2 files changed, 57 insertions(+), 19 deletions(-) > > diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c > index afb1239c8..9cb69367a 100644 > --- a/lib/librte_mempool/rte_mempool.c > +++ b/lib/librte_mempool/rte_mempool.c > @@ -1244,8 +1244,14 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) > for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { > sum.put_bulk += mp->stats[lcore_id].put_bulk; > sum.put_objs += mp->stats[lcore_id].put_objs; > + sum.put_objs_cache += mp->stats[lcore_id].put_objs_cache; > + sum.put_objs_pool += mp->stats[lcore_id].put_objs_pool; > + sum.put_objs_flush += mp->stats[lcore_id].put_objs_flush; > sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk; > sum.get_success_objs += mp->stats[lcore_id].get_success_objs; > + sum.get_success_objs_cache += mp->stats[lcore_id].get_success_objs_cache; > + sum.get_success_objs_pool += mp->stats[lcore_id].get_success_objs_pool; > + sum.get_success_objs_refill += mp->stats[lcore_id].get_success_objs_refill; > sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk; > sum.get_fail_objs += mp->stats[lcore_id].get_fail_objs; > sum.get_success_blks += mp->stats[lcore_id].get_success_blks; > @@ -1254,8 +1260,14 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) > fprintf(f, " stats:\n"); > fprintf(f, " put_bulk=%"PRIu64"\n", sum.put_bulk); > fprintf(f, " put_objs=%"PRIu64"\n", sum.put_objs); > + fprintf(f, " put_objs_cache=%"PRIu64"\n", sum.put_objs_cache); > + fprintf(f, " put_objs_pool=%"PRIu64"\n", sum.put_objs_pool); > + fprintf(f, " put_objs_flush=%"PRIu64"\n", sum.put_objs_flush); > fprintf(f, " get_success_bulk=%"PRIu64"\n", sum.get_success_bulk); > fprintf(f, " get_success_objs=%"PRIu64"\n", sum.get_success_objs); > + fprintf(f, " get_success_objs_cache=%"PRIu64"\n", sum.get_success_objs_cache); > + fprintf(f, " get_success_objs_pool=%"PRIu64"\n", sum.get_success_objs_pool); > + fprintf(f, " get_success_objs_refill=%"PRIu64"\n", sum.get_success_objs_refill); > fprintf(f, " get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk); > fprintf(f, " get_fail_objs=%"PRIu64"\n", sum.get_fail_objs); > if (info.contig_block_size > 0) { > diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h > index c551cf733..29d80d97e 100644 > --- a/lib/librte_mempool/rte_mempool.h > +++ b/lib/librte_mempool/rte_mempool.h > @@ -66,12 +66,18 @@ extern "C" { > * A structure that stores the mempool statistics (per-lcore). > */ > struct rte_mempool_debug_stats { > - uint64_t put_bulk; /**< Number of puts. */ > - uint64_t put_objs; /**< Number of objects successfully put. */ > - uint64_t get_success_bulk; /**< Successful allocation number. */ > - uint64_t get_success_objs; /**< Objects successfully allocated. */ > - uint64_t get_fail_bulk; /**< Failed allocation number. */ > - uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ > + uint64_t put_bulk; /**< Number of puts. */ > + uint64_t put_objs; /**< Number of objects successfully put. */ > + uint64_t put_objs_cache; /**< Number of objects successfully put to cache. */ > + uint64_t put_objs_pool; /**< Number of objects successfully put to pool. */ > + uint64_t put_objs_flush; /**< Number of flushing objects from cache to pool. */ > + uint64_t get_success_bulk; /**< Successful allocation number. */ > + uint64_t get_success_objs; /**< Objects successfully allocated. */ > + uint64_t get_success_objs_cache; /**< Objects successfully allocated from cache. */ > + uint64_t get_success_objs_pool; /**< Objects successfully allocated from pool. */ > + uint64_t get_success_objs_refill; /**< Number of refilling objects from pool to cache. */ > + uint64_t get_fail_bulk; /**< Failed allocation number. */ > + uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ What about having instead the following new stats: - put_common_pool_bulk: number of bulks enqueued in common pool - put_common_pool_objs: number of objects enqueued in common pool - get_common_pool_bulk: number of bulks dequeued from common pool - get_common_pool_objs: number of objects dequeued from common pool It looks easier to me to understand, compared to flush/refill. Especially, having 'objs' in the name makes me think that it counts a number of objects, but it's not the case. > /** Successful allocation number of contiguous blocks. */ > uint64_t get_success_blks; > /** Failed allocation number of contiguous blocks. */ > @@ -270,22 +276,34 @@ struct rte_mempool { > * Number to add to the object-oriented statistics. > */ > #ifdef RTE_LIBRTE_MEMPOOL_DEBUG > -#define __MEMPOOL_STAT_ADD(mp, name, n) do { \ > - unsigned __lcore_id = rte_lcore_id(); \ > - if (__lcore_id < RTE_MAX_LCORE) { \ > +#define __MEMPOOL_STAT_ADD(mp, name, n) do { \ > + unsigned __lcore_id = rte_lcore_id(); \ > + if (__lcore_id < RTE_MAX_LCORE) { \ > mp->stats[__lcore_id].name##_objs += n; \ > - mp->stats[__lcore_id].name##_bulk += 1; \ > - } \ > - } while(0) > -#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do { \ > - unsigned int __lcore_id = rte_lcore_id(); \ > - if (__lcore_id < RTE_MAX_LCORE) { \ > + mp->stats[__lcore_id].name##_bulk += 1; \ > + } \ > + } while (0) > +#define __MEMPOOL_OBJS_STAT_ADD(mp, name1, name2, n) do { \ > + unsigned __lcore_id = rte_lcore_id(); \ > + if (__lcore_id < RTE_MAX_LCORE) \ > + mp->stats[__lcore_id].name1##_objs_##name2 += n; \ > + } while (0) > +#define __MEMPOOL_OBJS_STAT_SUB(mp, name1, name2, n) do { \ > + unsigned __lcore_id = rte_lcore_id(); \ > + if (__lcore_id < RTE_MAX_LCORE) \ > + mp->stats[__lcore_id].name1##_objs_##name2 -= n; \ > + } while (0) > +#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do { \ > + unsigned int __lcore_id = rte_lcore_id(); \ > + if (__lcore_id < RTE_MAX_LCORE) { \ > mp->stats[__lcore_id].name##_blks += n; \ > mp->stats[__lcore_id].name##_bulk += 1; \ > - } \ > + } \ > } while (0) There are too many stats macros. I think that the original __MEMPOOL_STAT_ADD() macro can be reused if using the stats name I'm suggesting. Else, if not using names ending with _objs and _bulks, it would be better to rework the macro to something more generic, like this: #define __MEMPOOL_STAT_ADD(mp, name, n) do { \ unsigned __lcore_id = rte_lcore_id(); \ if (__lcore_id < RTE_MAX_LCORE) \ mp->stats[__lcore_id].name += n; \ } while (0) In this case, it could replace both __MEMPOOL_STAT_ADD() and __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(), and could be in a separate patch, before this one. It would replace the 6 macro calls by 12, but would be clearer because one can see the real field name. Eventually the macro could take the lcore_id as a parameter. > #else > -#define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0) > +#define __MEMPOOL_STAT_ADD(mp, name, n) do {} while (0) > +#define __MEMPOOL_OBJS_STAT_ADD(mp, name1, name2, n) do {} while (0) > +#define __MEMPOOL_OBJS_STAT_SUB(mp, name1, nmae2, n) do {} while (0) > #define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do {} while (0) > #endif > > @@ -1305,10 +1323,13 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, > > /* Add elements back into the cache */ > rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n); > - > + __MEMPOOL_OBJS_STAT_ADD(mp, put, cache, n); > cache->len += n; > > if (cache->len >= cache->flushthresh) { > + __MEMPOOL_OBJS_STAT_SUB(mp, put, cache, cache->len - cache->size); I don't think it is a good idea to decrease a statistic counter. If using put_common_pool_bulk/put_common_pool_objs, I think the code could go in rte_mempool_ops_enqueue_bulk() (and in rte_mempool_ops_dequeue_bulk() for 'get' stats). > + __MEMPOOL_OBJS_STAT_ADD(mp, put, pool, cache->len - cache->size); > + __MEMPOOL_OBJS_STAT_ADD(mp, put, flush, 1); > rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size], > cache->len - cache->size); > cache->len = cache->size; > @@ -1318,6 +1339,7 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, > > ring_enqueue: > > + __MEMPOOL_OBJS_STAT_ADD(mp, put, pool, n); > /* push remaining objects in ring */ > #ifdef RTE_LIBRTE_MEMPOOL_DEBUG > if (rte_mempool_ops_enqueue_bulk(mp, obj_table, n) < 0) > @@ -1437,6 +1459,7 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, > goto ring_dequeue; > } > > + __MEMPOOL_OBJS_STAT_ADD(mp, get_success, refill, 1); > cache->len += req; > } > > @@ -1447,6 +1470,7 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, > cache->len -= n; > > __MEMPOOL_STAT_ADD(mp, get_success, n); > + __MEMPOOL_OBJS_STAT_ADD(mp, get_success, cache, n); > > return 0; > > @@ -1457,8 +1481,10 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, > > if (ret < 0) > __MEMPOOL_STAT_ADD(mp, get_fail, n); > - else > + else { > __MEMPOOL_STAT_ADD(mp, get_success, n); > + __MEMPOOL_OBJS_STAT_ADD(mp, get_success, pool, n); > + } > > return ret; > } > -- > 2.30.0 >
- Add debug counters for objects put/get to/from the cache and the common pool. - Make __MEMPOOL_STAT_ADD() more generic --- v3: - Add a patch to make stat add macro generic - Remove other stat add/subtract macros - Rename counters for better understanding - Add put/get cache bulk counters v2: - Fix typo in the commit message --- Dharmik Thakkar (1): lib/mempool: make stats macro generic Joyce Kong (1): lib/mempool: distinguish debug counters from cache and pool lib/librte_mempool/rte_mempool.c | 24 +++++++++++ lib/librte_mempool/rte_mempool.h | 71 ++++++++++++++++++++------------ 2 files changed, 68 insertions(+), 27 deletions(-) -- 2.17.1
Make __MEMPOOL_STAT_ADD macro more generic and delete __MEMPOOL_CONTIG_BLOCKS_STAT_ADD macro Suggested-by: Olivier Matz <olivier.matz@6wind.com> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> --- lib/librte_mempool/rte_mempool.h | 34 +++++++++++++++----------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h index c551cf733acf..848a19226149 100644 --- a/lib/librte_mempool/rte_mempool.h +++ b/lib/librte_mempool/rte_mempool.h @@ -273,20 +273,11 @@ struct rte_mempool { #define __MEMPOOL_STAT_ADD(mp, name, n) do { \ unsigned __lcore_id = rte_lcore_id(); \ if (__lcore_id < RTE_MAX_LCORE) { \ - mp->stats[__lcore_id].name##_objs += n; \ - mp->stats[__lcore_id].name##_bulk += 1; \ + mp->stats[__lcore_id].name += n; \ } \ } while(0) -#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do { \ - unsigned int __lcore_id = rte_lcore_id(); \ - if (__lcore_id < RTE_MAX_LCORE) { \ - mp->stats[__lcore_id].name##_blks += n; \ - mp->stats[__lcore_id].name##_bulk += 1; \ - } \ - } while (0) #else #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0) -#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do {} while (0) #endif /** @@ -1288,7 +1279,8 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, void **cache_objs; /* increment stat now, adding in mempool always success */ - __MEMPOOL_STAT_ADD(mp, put, n); + __MEMPOOL_STAT_ADD(mp, put_bulk, 1); + __MEMPOOL_STAT_ADD(mp, put_objs, n); /* No cache provided or if put would overflow mem allocated for cache */ if (unlikely(cache == NULL || n > RTE_MEMPOOL_CACHE_MAX_SIZE)) @@ -1446,7 +1438,8 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, cache->len -= n; - __MEMPOOL_STAT_ADD(mp, get_success, n); + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_success_objs, n); return 0; @@ -1455,10 +1448,13 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, /* get remaining objects from ring */ ret = rte_mempool_ops_dequeue_bulk(mp, obj_table, n); - if (ret < 0) - __MEMPOOL_STAT_ADD(mp, get_fail, n); - else - __MEMPOOL_STAT_ADD(mp, get_success, n); + if (ret < 0) { + __MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_fail_objs, n); + } else { + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_success_objs, n); + } return ret; } @@ -1581,11 +1577,13 @@ rte_mempool_get_contig_blocks(struct rte_mempool *mp, ret = rte_mempool_ops_dequeue_contig_blocks(mp, first_obj_table, n); if (ret == 0) { - __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, get_success, n); + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_success_blks, n); __mempool_contig_blocks_check_cookies(mp, first_obj_table, n, 1); } else { - __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, get_fail, n); + __MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_fail_blks, n); } rte_mempool_trace_get_contig_blocks(mp, first_obj_table, n); -- 2.17.1
From: Joyce Kong <joyce.kong@arm.com> If cache is enabled, objects will be retrieved/put from/to cache, subsequently from/to the common pool. Now the debug stats calculate the objects retrieved/put from/to cache and pool together, it is better to distinguish them. Signed-off-by: Joyce Kong <joyce.kong@arm.com> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> --- lib/librte_mempool/rte_mempool.c | 24 ++++++++++++++++ lib/librte_mempool/rte_mempool.h | 47 ++++++++++++++++++++++---------- 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index afb1239c8d48..339f14455624 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -1244,6 +1244,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { sum.put_bulk += mp->stats[lcore_id].put_bulk; sum.put_objs += mp->stats[lcore_id].put_objs; + sum.put_common_pool_bulk += + mp->stats[lcore_id].put_common_pool_bulk; + sum.put_common_pool_objs += + mp->stats[lcore_id].put_common_pool_objs; + sum.put_cache_bulk += mp->stats[lcore_id].put_cache_bulk; + sum.put_cache_objs += mp->stats[lcore_id].put_cache_objs; + sum.get_common_pool_bulk += + mp->stats[lcore_id].get_common_pool_bulk; + sum.get_common_pool_objs += + mp->stats[lcore_id].get_common_pool_objs; + sum.get_cache_bulk += mp->stats[lcore_id].get_cache_bulk; + sum.get_cache_objs += mp->stats[lcore_id].get_cache_objs; sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk; sum.get_success_objs += mp->stats[lcore_id].get_success_objs; sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk; @@ -1254,6 +1266,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) fprintf(f, " stats:\n"); fprintf(f, " put_bulk=%"PRIu64"\n", sum.put_bulk); fprintf(f, " put_objs=%"PRIu64"\n", sum.put_objs); + fprintf(f, " put_common_pool_bulk=%"PRIu64"\n", + sum.put_common_pool_bulk); + fprintf(f, " put_common_pool_objs=%"PRIu64"\n", + sum.put_common_pool_objs); + fprintf(f, " put_cache_bulk=%"PRIu64"\n", sum.put_cache_bulk); + fprintf(f, " put_cache_objs=%"PRIu64"\n", sum.put_cache_objs); + fprintf(f, " get_common_pool_bulk=%"PRIu64"\n", + sum.get_common_pool_bulk); + fprintf(f, " get_common_pool_objs=%"PRIu64"\n", + sum.get_common_pool_objs); + fprintf(f, " get_cache_bulk=%"PRIu64"\n", sum.get_cache_bulk); + fprintf(f, " get_cache_objs=%"PRIu64"\n", sum.get_cache_objs); fprintf(f, " get_success_bulk=%"PRIu64"\n", sum.get_success_bulk); fprintf(f, " get_success_objs=%"PRIu64"\n", sum.get_success_objs); fprintf(f, " get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk); diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h index 848a19226149..0959f8a3f367 100644 --- a/lib/librte_mempool/rte_mempool.h +++ b/lib/librte_mempool/rte_mempool.h @@ -66,12 +66,20 @@ extern "C" { * A structure that stores the mempool statistics (per-lcore). */ struct rte_mempool_debug_stats { - uint64_t put_bulk; /**< Number of puts. */ - uint64_t put_objs; /**< Number of objects successfully put. */ - uint64_t get_success_bulk; /**< Successful allocation number. */ - uint64_t get_success_objs; /**< Objects successfully allocated. */ - uint64_t get_fail_bulk; /**< Failed allocation number. */ - uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ + uint64_t put_bulk; /**< Number of puts. */ + uint64_t put_objs; /**< Number of objects successfully put. */ + uint64_t put_common_pool_bulk; /**< Number of bulks enqueued in common pool. */ + uint64_t put_common_pool_objs; /**< Number of objects enqueued in common pool. */ + uint64_t put_cache_bulk; /**< Number of bulks enqueued in cache. */ + uint64_t put_cache_objs; /**< Number of objects enqueued in cache. */ + uint64_t get_common_pool_bulk; /**< Number of bulks dequeued from common pool. */ + uint64_t get_common_pool_objs; /**< Number of objects dequeued from common pool. */ + uint64_t get_cache_bulk; /**< Number of bulks dequeued from cache. */ + uint64_t get_cache_objs; /**< Number of objects dequeued from cache. */ + uint64_t get_success_bulk; /**< Successful allocation number. */ + uint64_t get_success_objs; /**< Objects successfully allocated. */ + uint64_t get_fail_bulk; /**< Failed allocation number. */ + uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ /** Successful allocation number of contiguous blocks. */ uint64_t get_success_blks; /** Failed allocation number of contiguous blocks. */ @@ -699,10 +707,18 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp, void **obj_table, unsigned n) { struct rte_mempool_ops *ops; + int ret; rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n); ops = rte_mempool_get_ops(mp->ops_index); - return ops->dequeue(mp, obj_table, n); + ret = ops->dequeue(mp, obj_table, n); + if (ret == 0) { + __MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n); + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_success_objs, n); + } + return ret; } /** @@ -749,6 +765,8 @@ rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void * const *obj_table, { struct rte_mempool_ops *ops; + __MEMPOOL_STAT_ADD(mp, put_common_pool_bulk, 1); + __MEMPOOL_STAT_ADD(mp, put_common_pool_objs, n); rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n); ops = rte_mempool_get_ops(mp->ops_index); return ops->enqueue(mp, obj_table, n); @@ -1297,14 +1315,18 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, /* Add elements back into the cache */ rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n); - cache->len += n; + __MEMPOOL_STAT_ADD(mp, put_cache_bulk, 1); + if (cache->len >= cache->flushthresh) { + __MEMPOOL_STAT_ADD(mp, put_cache_objs, + n - (cache->len - cache->size)); rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size], cache->len - cache->size); cache->len = cache->size; - } + } else + __MEMPOOL_STAT_ADD(mp, put_cache_objs, n); return; @@ -1438,8 +1460,8 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, cache->len -= n; - __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); - __MEMPOOL_STAT_ADD(mp, get_success_objs, n); + __MEMPOOL_STAT_ADD(mp, get_cache_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_cache_objs, n); return 0; @@ -1451,9 +1473,6 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, if (ret < 0) { __MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); __MEMPOOL_STAT_ADD(mp, get_fail_objs, n); - } else { - __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); - __MEMPOOL_STAT_ADD(mp, get_success_objs, n); } return ret; -- 2.17.1
Hi Olivier,
Apologies for the delayed response!
I’m stepping in since Joyce is on vacation.
I have updated the patch with the required changes.
Thank you!
> On Apr 7, 2021, at 9:28 AM, Olivier Matz <olivier.matz@6wind.com> wrote:
On Mon, Apr 19, 2021 at 07:07:59PM -0500, Dharmik Thakkar wrote:
> Make __MEMPOOL_STAT_ADD macro more generic and delete
> __MEMPOOL_CONTIG_BLOCKS_STAT_ADD macro
>
> Suggested-by: Olivier Matz <olivier.matz@6wind.com>
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Acked-by: Olivier Matz <olivier.matz@6wind.com>
Hi Dharmik, Please see some comments below. On Mon, Apr 19, 2021 at 07:08:00PM -0500, Dharmik Thakkar wrote: > From: Joyce Kong <joyce.kong@arm.com> > > If cache is enabled, objects will be retrieved/put from/to cache, > subsequently from/to the common pool. Now the debug stats calculate > the objects retrieved/put from/to cache and pool together, it is > better to distinguish them. > > Signed-off-by: Joyce Kong <joyce.kong@arm.com> > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > --- > lib/librte_mempool/rte_mempool.c | 24 ++++++++++++++++ > lib/librte_mempool/rte_mempool.h | 47 ++++++++++++++++++++++---------- > 2 files changed, 57 insertions(+), 14 deletions(-) > > diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c > index afb1239c8d48..339f14455624 100644 > --- a/lib/librte_mempool/rte_mempool.c > +++ b/lib/librte_mempool/rte_mempool.c > @@ -1244,6 +1244,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) > for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { > sum.put_bulk += mp->stats[lcore_id].put_bulk; > sum.put_objs += mp->stats[lcore_id].put_objs; > + sum.put_common_pool_bulk += > + mp->stats[lcore_id].put_common_pool_bulk; > + sum.put_common_pool_objs += > + mp->stats[lcore_id].put_common_pool_objs; > + sum.put_cache_bulk += mp->stats[lcore_id].put_cache_bulk; > + sum.put_cache_objs += mp->stats[lcore_id].put_cache_objs; > + sum.get_common_pool_bulk += > + mp->stats[lcore_id].get_common_pool_bulk; > + sum.get_common_pool_objs += > + mp->stats[lcore_id].get_common_pool_objs; > + sum.get_cache_bulk += mp->stats[lcore_id].get_cache_bulk; > + sum.get_cache_objs += mp->stats[lcore_id].get_cache_objs; > sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk; > sum.get_success_objs += mp->stats[lcore_id].get_success_objs; > sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk; > @@ -1254,6 +1266,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) > fprintf(f, " stats:\n"); > fprintf(f, " put_bulk=%"PRIu64"\n", sum.put_bulk); > fprintf(f, " put_objs=%"PRIu64"\n", sum.put_objs); > + fprintf(f, " put_common_pool_bulk=%"PRIu64"\n", > + sum.put_common_pool_bulk); > + fprintf(f, " put_common_pool_objs=%"PRIu64"\n", > + sum.put_common_pool_objs); > + fprintf(f, " put_cache_bulk=%"PRIu64"\n", sum.put_cache_bulk); > + fprintf(f, " put_cache_objs=%"PRIu64"\n", sum.put_cache_objs); > + fprintf(f, " get_common_pool_bulk=%"PRIu64"\n", > + sum.get_common_pool_bulk); > + fprintf(f, " get_common_pool_objs=%"PRIu64"\n", > + sum.get_common_pool_objs); > + fprintf(f, " get_cache_bulk=%"PRIu64"\n", sum.get_cache_bulk); > + fprintf(f, " get_cache_objs=%"PRIu64"\n", sum.get_cache_objs); > fprintf(f, " get_success_bulk=%"PRIu64"\n", sum.get_success_bulk); > fprintf(f, " get_success_objs=%"PRIu64"\n", sum.get_success_objs); > fprintf(f, " get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk); > diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h > index 848a19226149..0959f8a3f367 100644 > --- a/lib/librte_mempool/rte_mempool.h > +++ b/lib/librte_mempool/rte_mempool.h > @@ -66,12 +66,20 @@ extern "C" { > * A structure that stores the mempool statistics (per-lcore). > */ > struct rte_mempool_debug_stats { > - uint64_t put_bulk; /**< Number of puts. */ > - uint64_t put_objs; /**< Number of objects successfully put. */ > - uint64_t get_success_bulk; /**< Successful allocation number. */ > - uint64_t get_success_objs; /**< Objects successfully allocated. */ > - uint64_t get_fail_bulk; /**< Failed allocation number. */ > - uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ > + uint64_t put_bulk; /**< Number of puts. */ > + uint64_t put_objs; /**< Number of objects successfully put. */ > + uint64_t put_common_pool_bulk; /**< Number of bulks enqueued in common pool. */ > + uint64_t put_common_pool_objs; /**< Number of objects enqueued in common pool. */ > + uint64_t put_cache_bulk; /**< Number of bulks enqueued in cache. */ > + uint64_t put_cache_objs; /**< Number of objects enqueued in cache. */ > + uint64_t get_common_pool_bulk; /**< Number of bulks dequeued from common pool. */ > + uint64_t get_common_pool_objs; /**< Number of objects dequeued from common pool. */ > + uint64_t get_cache_bulk; /**< Number of bulks dequeued from cache. */ > + uint64_t get_cache_objs; /**< Number of objects dequeued from cache. */ > + uint64_t get_success_bulk; /**< Successful allocation number. */ > + uint64_t get_success_objs; /**< Objects successfully allocated. */ > + uint64_t get_fail_bulk; /**< Failed allocation number. */ > + uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ I missed it the first time, but this changes the size of the rte_mempool_debug_stats structure. I think we don't care about this ABI breakage because this structure is only defined if RTE_LIBRTE_MEMPOOL_DEBUG is set. But just in case, adding Ray as Cc. About the field themselves, I'm not certain that there is an added value to have stats for cache gets and puts. My feeling is that the important stat to monitor is the access to common pool, because it is the one that highlights a possible performance impact (contention). The cache stats are more or less equal to "success + fail - common". Moreover, it will simplify the patch and avoid risks of mistakes. What do you think? > /** Successful allocation number of contiguous blocks. */ > uint64_t get_success_blks; > /** Failed allocation number of contiguous blocks. */ > @@ -699,10 +707,18 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp, > void **obj_table, unsigned n) > { > struct rte_mempool_ops *ops; > + int ret; > > rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n); > ops = rte_mempool_get_ops(mp->ops_index); > - return ops->dequeue(mp, obj_table, n); > + ret = ops->dequeue(mp, obj_table, n); > + if (ret == 0) { > + __MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1); > + __MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n); > + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); > + __MEMPOOL_STAT_ADD(mp, get_success_objs, n); > + } > + return ret; > } > > /** > @@ -749,6 +765,8 @@ rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void * const *obj_table, > { > struct rte_mempool_ops *ops; > > + __MEMPOOL_STAT_ADD(mp, put_common_pool_bulk, 1); > + __MEMPOOL_STAT_ADD(mp, put_common_pool_objs, n); > rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n); > ops = rte_mempool_get_ops(mp->ops_index); > return ops->enqueue(mp, obj_table, n); > @@ -1297,14 +1315,18 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, > > /* Add elements back into the cache */ > rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n); > - > cache->len += n; > > + __MEMPOOL_STAT_ADD(mp, put_cache_bulk, 1); > + > if (cache->len >= cache->flushthresh) { > + __MEMPOOL_STAT_ADD(mp, put_cache_objs, > + n - (cache->len - cache->size)); > rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size], > cache->len - cache->size); > cache->len = cache->size; > - } > + } else > + __MEMPOOL_STAT_ADD(mp, put_cache_objs, n); > In case we keep cache stats, I'd add {} after the else to be consistent with the if(). > return; > > @@ -1438,8 +1460,8 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, > > cache->len -= n; > > - __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); > - __MEMPOOL_STAT_ADD(mp, get_success_objs, n); > + __MEMPOOL_STAT_ADD(mp, get_cache_bulk, 1); > + __MEMPOOL_STAT_ADD(mp, get_cache_objs, n); In case we keep cache stats, I don't think we should remove get_success stats increment. Else, the success stats will never be incremented when retrieving objects from the cache. > > return 0; > > @@ -1451,9 +1473,6 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, > if (ret < 0) { > __MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); > __MEMPOOL_STAT_ADD(mp, get_fail_objs, n); > - } else { > - __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); > - __MEMPOOL_STAT_ADD(mp, get_success_objs, n); > } > > return ret; > -- > 2.17.1 >
Hi Olivier, Thank you for your comments! > On Apr 21, 2021, at 11:29 AM, Olivier Matz <olivier.matz@6wind.com> wrote: > > Hi Dharmik, > > Please see some comments below. > > On Mon, Apr 19, 2021 at 07:08:00PM -0500, Dharmik Thakkar wrote: >> From: Joyce Kong <joyce.kong@arm.com> >> >> If cache is enabled, objects will be retrieved/put from/to cache, >> subsequently from/to the common pool. Now the debug stats calculate >> the objects retrieved/put from/to cache and pool together, it is >> better to distinguish them. >> >> Signed-off-by: Joyce Kong <joyce.kong@arm.com> >> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> >> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> >> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> >> --- >> lib/librte_mempool/rte_mempool.c | 24 ++++++++++++++++ >> lib/librte_mempool/rte_mempool.h | 47 ++++++++++++++++++++++---------- >> 2 files changed, 57 insertions(+), 14 deletions(-) >> >> diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c >> index afb1239c8d48..339f14455624 100644 >> --- a/lib/librte_mempool/rte_mempool.c >> +++ b/lib/librte_mempool/rte_mempool.c >> @@ -1244,6 +1244,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) >> for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { >> sum.put_bulk += mp->stats[lcore_id].put_bulk; >> sum.put_objs += mp->stats[lcore_id].put_objs; >> + sum.put_common_pool_bulk += >> + mp->stats[lcore_id].put_common_pool_bulk; >> + sum.put_common_pool_objs += >> + mp->stats[lcore_id].put_common_pool_objs; >> + sum.put_cache_bulk += mp->stats[lcore_id].put_cache_bulk; >> + sum.put_cache_objs += mp->stats[lcore_id].put_cache_objs; >> + sum.get_common_pool_bulk += >> + mp->stats[lcore_id].get_common_pool_bulk; >> + sum.get_common_pool_objs += >> + mp->stats[lcore_id].get_common_pool_objs; >> + sum.get_cache_bulk += mp->stats[lcore_id].get_cache_bulk; >> + sum.get_cache_objs += mp->stats[lcore_id].get_cache_objs; >> sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk; >> sum.get_success_objs += mp->stats[lcore_id].get_success_objs; >> sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk; >> @@ -1254,6 +1266,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) >> fprintf(f, " stats:\n"); >> fprintf(f, " put_bulk=%"PRIu64"\n", sum.put_bulk); >> fprintf(f, " put_objs=%"PRIu64"\n", sum.put_objs); >> + fprintf(f, " put_common_pool_bulk=%"PRIu64"\n", >> + sum.put_common_pool_bulk); >> + fprintf(f, " put_common_pool_objs=%"PRIu64"\n", >> + sum.put_common_pool_objs); >> + fprintf(f, " put_cache_bulk=%"PRIu64"\n", sum.put_cache_bulk); >> + fprintf(f, " put_cache_objs=%"PRIu64"\n", sum.put_cache_objs); >> + fprintf(f, " get_common_pool_bulk=%"PRIu64"\n", >> + sum.get_common_pool_bulk); >> + fprintf(f, " get_common_pool_objs=%"PRIu64"\n", >> + sum.get_common_pool_objs); >> + fprintf(f, " get_cache_bulk=%"PRIu64"\n", sum.get_cache_bulk); >> + fprintf(f, " get_cache_objs=%"PRIu64"\n", sum.get_cache_objs); >> fprintf(f, " get_success_bulk=%"PRIu64"\n", sum.get_success_bulk); >> fprintf(f, " get_success_objs=%"PRIu64"\n", sum.get_success_objs); >> fprintf(f, " get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk); >> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h >> index 848a19226149..0959f8a3f367 100644 >> --- a/lib/librte_mempool/rte_mempool.h >> +++ b/lib/librte_mempool/rte_mempool.h >> @@ -66,12 +66,20 @@ extern "C" { >> * A structure that stores the mempool statistics (per-lcore). >> */ >> struct rte_mempool_debug_stats { >> - uint64_t put_bulk; /**< Number of puts. */ >> - uint64_t put_objs; /**< Number of objects successfully put. */ >> - uint64_t get_success_bulk; /**< Successful allocation number. */ >> - uint64_t get_success_objs; /**< Objects successfully allocated. */ >> - uint64_t get_fail_bulk; /**< Failed allocation number. */ >> - uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ >> + uint64_t put_bulk; /**< Number of puts. */ >> + uint64_t put_objs; /**< Number of objects successfully put. */ >> + uint64_t put_common_pool_bulk; /**< Number of bulks enqueued in common pool. */ >> + uint64_t put_common_pool_objs; /**< Number of objects enqueued in common pool. */ >> + uint64_t put_cache_bulk; /**< Number of bulks enqueued in cache. */ >> + uint64_t put_cache_objs; /**< Number of objects enqueued in cache. */ >> + uint64_t get_common_pool_bulk; /**< Number of bulks dequeued from common pool. */ >> + uint64_t get_common_pool_objs; /**< Number of objects dequeued from common pool. */ >> + uint64_t get_cache_bulk; /**< Number of bulks dequeued from cache. */ >> + uint64_t get_cache_objs; /**< Number of objects dequeued from cache. */ >> + uint64_t get_success_bulk; /**< Successful allocation number. */ >> + uint64_t get_success_objs; /**< Objects successfully allocated. */ >> + uint64_t get_fail_bulk; /**< Failed allocation number. */ >> + uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ > > I missed it the first time, but this changes the size of the > rte_mempool_debug_stats structure. I think we don't care about this ABI > breakage because this structure is only defined if > RTE_LIBRTE_MEMPOOL_DEBUG is set. But just in case, adding Ray as Cc. Agreed, thank you! > > About the field themselves, I'm not certain that there is an added value > to have stats for cache gets and puts. My feeling is that the important > stat to monitor is the access to common pool, because it is the one that > highlights a possible performance impact (contention). The cache stats > are more or less equal to "success + fail - common". Moreover, it will > simplify the patch and avoid risks of mistakes. > > What do you think? Yes, I think the cache stats can be removed. Also, please correct me if I’m wrong; but, in my understanding, the cache stats are equal to “success - common”. Is adding “fail” required? > >> /** Successful allocation number of contiguous blocks. */ >> uint64_t get_success_blks; >> /** Failed allocation number of contiguous blocks. */ >> @@ -699,10 +707,18 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp, >> void **obj_table, unsigned n) >> { >> struct rte_mempool_ops *ops; >> + int ret; >> >> rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n); >> ops = rte_mempool_get_ops(mp->ops_index); >> - return ops->dequeue(mp, obj_table, n); >> + ret = ops->dequeue(mp, obj_table, n); >> + if (ret == 0) { >> + __MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1); >> + __MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n); >> + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); >> + __MEMPOOL_STAT_ADD(mp, get_success_objs, n); >> + } >> + return ret; >> } >> >> /** >> @@ -749,6 +765,8 @@ rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void * const *obj_table, >> { >> struct rte_mempool_ops *ops; >> >> + __MEMPOOL_STAT_ADD(mp, put_common_pool_bulk, 1); >> + __MEMPOOL_STAT_ADD(mp, put_common_pool_objs, n); >> rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n); >> ops = rte_mempool_get_ops(mp->ops_index); >> return ops->enqueue(mp, obj_table, n); >> @@ -1297,14 +1315,18 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, >> >> /* Add elements back into the cache */ >> rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n); >> - >> cache->len += n; >> >> + __MEMPOOL_STAT_ADD(mp, put_cache_bulk, 1); >> + >> if (cache->len >= cache->flushthresh) { >> + __MEMPOOL_STAT_ADD(mp, put_cache_objs, >> + n - (cache->len - cache->size)); >> rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size], >> cache->len - cache->size); >> cache->len = cache->size; >> - } >> + } else >> + __MEMPOOL_STAT_ADD(mp, put_cache_objs, n); >> > > In case we keep cache stats, I'd add {} after the else to be consistent > with the if(). Ack. > >> return; >> >> @@ -1438,8 +1460,8 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, >> >> cache->len -= n; >> >> - __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); >> - __MEMPOOL_STAT_ADD(mp, get_success_objs, n); >> + __MEMPOOL_STAT_ADD(mp, get_cache_bulk, 1); >> + __MEMPOOL_STAT_ADD(mp, get_cache_objs, n); > > In case we keep cache stats, I don't think we should remove get_success > stats increment. Else, the success stats will never be incremented when > retrieving objects from the cache. > Good catch. Thanks! > >> >> return 0; >> >> @@ -1451,9 +1473,6 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, >> if (ret < 0) { >> __MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); >> __MEMPOOL_STAT_ADD(mp, get_fail_objs, n); >> - } else { >> - __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); >> - __MEMPOOL_STAT_ADD(mp, get_success_objs, n); >> } >> >> return ret; >> -- >> 2.17.1
<snip> > >> diff --git a/lib/librte_mempool/rte_mempool.h > >> b/lib/librte_mempool/rte_mempool.h > >> index 848a19226149..0959f8a3f367 100644 > >> --- a/lib/librte_mempool/rte_mempool.h > >> +++ b/lib/librte_mempool/rte_mempool.h > >> @@ -66,12 +66,20 @@ extern "C" { > >> * A structure that stores the mempool statistics (per-lcore). > >> */ > >> struct rte_mempool_debug_stats { > >> -uint64_t put_bulk; /**< Number of puts. */ > >> -uint64_t put_objs; /**< Number of objects successfully put. */ > >> -uint64_t get_success_bulk; /**< Successful allocation number. */ > >> -uint64_t get_success_objs; /**< Objects successfully allocated. */ > >> -uint64_t get_fail_bulk; /**< Failed allocation number. */ > >> -uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ > >> +uint64_t put_bulk; /**< Number of puts. */ uint64_t put_objs; /**< > >> +Number of objects successfully put. */ uint64_t > >> +put_common_pool_bulk; /**< Number of bulks enqueued in common > pool. > >> +*/ uint64_t put_common_pool_objs; /**< Number of objects enqueued > >> +in common pool. */ uint64_t put_cache_bulk; /**< Number of bulks > >> +enqueued in cache. */ uint64_t put_cache_objs; /**< Number of objects > enqueued in cache. */ > >> +uint64_t get_common_pool_bulk; /**< Number of bulks dequeued from > common pool. */ > >> +uint64_t get_common_pool_objs; /**< Number of objects dequeued from > >> +common pool. */ uint64_t get_cache_bulk; /**< Number of bulks > >> +dequeued from cache. */ uint64_t get_cache_objs; /**< Number of > >> +objects dequeued from cache. */ uint64_t get_success_bulk; /**< > >> +Successful allocation number. */ uint64_t get_success_objs; /**< > >> +Objects successfully allocated. */ uint64_t get_fail_bulk; /**< > >> +Failed allocation number. */ uint64_t get_fail_objs; /**< Objects > >> +that failed to be allocated. */ > > > > I missed it the first time, but this changes the size of the > > rte_mempool_debug_stats structure. I think we don't care about this > > ABI breakage because this structure is only defined if > > RTE_LIBRTE_MEMPOOL_DEBUG is set. But just in case, adding Ray as Cc. > > Agreed, thank you! > > > > > About the field themselves, I'm not certain that there is an added > > value to have stats for cache gets and puts. My feeling is that the > > important stat to monitor is the access to common pool, because it is > > the one that highlights a possible performance impact (contention). > > The cache stats are more or less equal to "success + fail - common". > > Moreover, it will simplify the patch and avoid risks of mistakes. > > > > What do you think? Agree as well. Can you please add a comment making a note of this in the stats structure? > > Yes, I think the cache stats can be removed. > Also, please correct me if I’m wrong; but, in my understanding, the cache stats > are equal to “success - common”. Is adding “fail” required? > > > <snip>
- Add debug counters for objects put/get to/from the common pool. - Make __MEMPOOL_STAT_ADD() more generic --- v4: - Remove cache stats v3: - Add a patch to make stat add macro generic - Remove other stat add/subtract macros - Rename counters for better understanding - Add put/get cache bulk counters v2: - Fix typo in the commit message --- Dharmik Thakkar (1): lib/mempool: make stats macro generic Joyce Kong (1): lib/mempool: distinguish debug counters from cache and pool lib/mempool/rte_mempool.c | 16 ++++++++++ lib/mempool/rte_mempool.h | 67 +++++++++++++++++++++++---------------- 2 files changed, 56 insertions(+), 27 deletions(-) -- 2.17.1
Make __MEMPOOL_STAT_ADD macro more generic and delete __MEMPOOL_CONTIG_BLOCKS_STAT_ADD macro Suggested-by: Olivier Matz <olivier.matz@6wind.com> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Acked-by: Olivier Matz <olivier.matz@6wind.com> --- lib/mempool/rte_mempool.h | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h index c551cf733acf..848a19226149 100644 --- a/lib/mempool/rte_mempool.h +++ b/lib/mempool/rte_mempool.h @@ -273,20 +273,11 @@ struct rte_mempool { #define __MEMPOOL_STAT_ADD(mp, name, n) do { \ unsigned __lcore_id = rte_lcore_id(); \ if (__lcore_id < RTE_MAX_LCORE) { \ - mp->stats[__lcore_id].name##_objs += n; \ - mp->stats[__lcore_id].name##_bulk += 1; \ + mp->stats[__lcore_id].name += n; \ } \ } while(0) -#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do { \ - unsigned int __lcore_id = rte_lcore_id(); \ - if (__lcore_id < RTE_MAX_LCORE) { \ - mp->stats[__lcore_id].name##_blks += n; \ - mp->stats[__lcore_id].name##_bulk += 1; \ - } \ - } while (0) #else #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0) -#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do {} while (0) #endif /** @@ -1288,7 +1279,8 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, void **cache_objs; /* increment stat now, adding in mempool always success */ - __MEMPOOL_STAT_ADD(mp, put, n); + __MEMPOOL_STAT_ADD(mp, put_bulk, 1); + __MEMPOOL_STAT_ADD(mp, put_objs, n); /* No cache provided or if put would overflow mem allocated for cache */ if (unlikely(cache == NULL || n > RTE_MEMPOOL_CACHE_MAX_SIZE)) @@ -1446,7 +1438,8 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, cache->len -= n; - __MEMPOOL_STAT_ADD(mp, get_success, n); + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_success_objs, n); return 0; @@ -1455,10 +1448,13 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, /* get remaining objects from ring */ ret = rte_mempool_ops_dequeue_bulk(mp, obj_table, n); - if (ret < 0) - __MEMPOOL_STAT_ADD(mp, get_fail, n); - else - __MEMPOOL_STAT_ADD(mp, get_success, n); + if (ret < 0) { + __MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_fail_objs, n); + } else { + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_success_objs, n); + } return ret; } @@ -1581,11 +1577,13 @@ rte_mempool_get_contig_blocks(struct rte_mempool *mp, ret = rte_mempool_ops_dequeue_contig_blocks(mp, first_obj_table, n); if (ret == 0) { - __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, get_success, n); + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_success_blks, n); __mempool_contig_blocks_check_cookies(mp, first_obj_table, n, 1); } else { - __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, get_fail, n); + __MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_fail_blks, n); } rte_mempool_trace_get_contig_blocks(mp, first_obj_table, n); -- 2.17.1
From: Joyce Kong <joyce.kong@arm.com> If cache is enabled, objects will be retrieved/put from/to cache, subsequently from/to the common pool. Now the debug stats calculate the objects retrieved/put from/to cache and pool together, it is better to distinguish them. Signed-off-by: Joyce Kong <joyce.kong@arm.com> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> --- lib/mempool/rte_mempool.c | 16 +++++++++++++++ lib/mempool/rte_mempool.h | 43 ++++++++++++++++++++++++++------------- 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/lib/mempool/rte_mempool.c b/lib/mempool/rte_mempool.c index afb1239c8d48..e9343c2a7f6b 100644 --- a/lib/mempool/rte_mempool.c +++ b/lib/mempool/rte_mempool.c @@ -1244,6 +1244,14 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { sum.put_bulk += mp->stats[lcore_id].put_bulk; sum.put_objs += mp->stats[lcore_id].put_objs; + sum.put_common_pool_bulk += + mp->stats[lcore_id].put_common_pool_bulk; + sum.put_common_pool_objs += + mp->stats[lcore_id].put_common_pool_objs; + sum.get_common_pool_bulk += + mp->stats[lcore_id].get_common_pool_bulk; + sum.get_common_pool_objs += + mp->stats[lcore_id].get_common_pool_objs; sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk; sum.get_success_objs += mp->stats[lcore_id].get_success_objs; sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk; @@ -1254,6 +1262,14 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) fprintf(f, " stats:\n"); fprintf(f, " put_bulk=%"PRIu64"\n", sum.put_bulk); fprintf(f, " put_objs=%"PRIu64"\n", sum.put_objs); + fprintf(f, " put_common_pool_bulk=%"PRIu64"\n", + sum.put_common_pool_bulk); + fprintf(f, " put_common_pool_objs=%"PRIu64"\n", + sum.put_common_pool_objs); + fprintf(f, " get_common_pool_bulk=%"PRIu64"\n", + sum.get_common_pool_bulk); + fprintf(f, " get_common_pool_objs=%"PRIu64"\n", + sum.get_common_pool_objs); fprintf(f, " get_success_bulk=%"PRIu64"\n", sum.get_success_bulk); fprintf(f, " get_success_objs=%"PRIu64"\n", sum.get_success_objs); fprintf(f, " get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk); diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h index 848a19226149..4343b287dc4e 100644 --- a/lib/mempool/rte_mempool.h +++ b/lib/mempool/rte_mempool.h @@ -64,14 +64,21 @@ extern "C" { #ifdef RTE_LIBRTE_MEMPOOL_DEBUG /** * A structure that stores the mempool statistics (per-lcore). + * Note: Cache stats (put_cache_bulk/objs, get_cache_bulk/objs) are not + * captured since they can be calculated from other stats. + * For example: put_cache_objs = put_objs - put_common_pool_objs. */ struct rte_mempool_debug_stats { - uint64_t put_bulk; /**< Number of puts. */ - uint64_t put_objs; /**< Number of objects successfully put. */ - uint64_t get_success_bulk; /**< Successful allocation number. */ - uint64_t get_success_objs; /**< Objects successfully allocated. */ - uint64_t get_fail_bulk; /**< Failed allocation number. */ - uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ + uint64_t put_bulk; /**< Number of puts. */ + uint64_t put_objs; /**< Number of objects successfully put. */ + uint64_t put_common_pool_bulk; /**< Number of bulks enqueued in common pool. */ + uint64_t put_common_pool_objs; /**< Number of objects enqueued in common pool. */ + uint64_t get_common_pool_bulk; /**< Number of bulks dequeued from common pool. */ + uint64_t get_common_pool_objs; /**< Number of objects dequeued from common pool. */ + uint64_t get_success_bulk; /**< Successful allocation number. */ + uint64_t get_success_objs; /**< Objects successfully allocated. */ + uint64_t get_fail_bulk; /**< Failed allocation number. */ + uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ /** Successful allocation number of contiguous blocks. */ uint64_t get_success_blks; /** Failed allocation number of contiguous blocks. */ @@ -699,10 +706,18 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp, void **obj_table, unsigned n) { struct rte_mempool_ops *ops; + int ret; rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n); ops = rte_mempool_get_ops(mp->ops_index); - return ops->dequeue(mp, obj_table, n); + ret = ops->dequeue(mp, obj_table, n); + if (ret == 0) { + __MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n); + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_success_objs, n); + } + return ret; } /** @@ -749,6 +764,8 @@ rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void * const *obj_table, { struct rte_mempool_ops *ops; + __MEMPOOL_STAT_ADD(mp, put_common_pool_bulk, 1); + __MEMPOOL_STAT_ADD(mp, put_common_pool_objs, n); rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n); ops = rte_mempool_get_ops(mp->ops_index); return ops->enqueue(mp, obj_table, n); @@ -1297,9 +1314,10 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, /* Add elements back into the cache */ rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n); - cache->len += n; + __MEMPOOL_STAT_ADD(mp, put_cache_bulk, 1); + if (cache->len >= cache->flushthresh) { rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size], cache->len - cache->size); @@ -1430,6 +1448,9 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, } cache->len += req; + } else { + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_success_objs, n); } /* Now fill in the response ... */ @@ -1438,9 +1459,6 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, cache->len -= n; - __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); - __MEMPOOL_STAT_ADD(mp, get_success_objs, n); - return 0; ring_dequeue: @@ -1451,9 +1469,6 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, if (ret < 0) { __MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); __MEMPOOL_STAT_ADD(mp, get_fail_objs, n); - } else { - __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); - __MEMPOOL_STAT_ADD(mp, get_success_objs, n); } return ret; -- 2.17.1
On 21/04/2021 17:29, Olivier Matz wrote: > Hi Dharmik, > > Please see some comments below. > > On Mon, Apr 19, 2021 at 07:08:00PM -0500, Dharmik Thakkar wrote: >> From: Joyce Kong <joyce.kong@arm.com> >> >> If cache is enabled, objects will be retrieved/put from/to cache, >> subsequently from/to the common pool. Now the debug stats calculate >> the objects retrieved/put from/to cache and pool together, it is >> better to distinguish them. >> >> Signed-off-by: Joyce Kong <joyce.kong@arm.com> >> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> >> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> >> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> >> --- >> lib/librte_mempool/rte_mempool.c | 24 ++++++++++++++++ >> lib/librte_mempool/rte_mempool.h | 47 ++++++++++++++++++++++---------- >> 2 files changed, 57 insertions(+), 14 deletions(-) >> >> diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c >> index afb1239c8d48..339f14455624 100644 >> --- a/lib/librte_mempool/rte_mempool.c >> +++ b/lib/librte_mempool/rte_mempool.c >> @@ -1244,6 +1244,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) >> for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { >> sum.put_bulk += mp->stats[lcore_id].put_bulk; >> sum.put_objs += mp->stats[lcore_id].put_objs; >> + sum.put_common_pool_bulk += >> + mp->stats[lcore_id].put_common_pool_bulk; >> + sum.put_common_pool_objs += >> + mp->stats[lcore_id].put_common_pool_objs; >> + sum.put_cache_bulk += mp->stats[lcore_id].put_cache_bulk; >> + sum.put_cache_objs += mp->stats[lcore_id].put_cache_objs; >> + sum.get_common_pool_bulk += >> + mp->stats[lcore_id].get_common_pool_bulk; >> + sum.get_common_pool_objs += >> + mp->stats[lcore_id].get_common_pool_objs; >> + sum.get_cache_bulk += mp->stats[lcore_id].get_cache_bulk; >> + sum.get_cache_objs += mp->stats[lcore_id].get_cache_objs; >> sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk; >> sum.get_success_objs += mp->stats[lcore_id].get_success_objs; >> sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk; >> @@ -1254,6 +1266,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) >> fprintf(f, " stats:\n"); >> fprintf(f, " put_bulk=%"PRIu64"\n", sum.put_bulk); >> fprintf(f, " put_objs=%"PRIu64"\n", sum.put_objs); >> + fprintf(f, " put_common_pool_bulk=%"PRIu64"\n", >> + sum.put_common_pool_bulk); >> + fprintf(f, " put_common_pool_objs=%"PRIu64"\n", >> + sum.put_common_pool_objs); >> + fprintf(f, " put_cache_bulk=%"PRIu64"\n", sum.put_cache_bulk); >> + fprintf(f, " put_cache_objs=%"PRIu64"\n", sum.put_cache_objs); >> + fprintf(f, " get_common_pool_bulk=%"PRIu64"\n", >> + sum.get_common_pool_bulk); >> + fprintf(f, " get_common_pool_objs=%"PRIu64"\n", >> + sum.get_common_pool_objs); >> + fprintf(f, " get_cache_bulk=%"PRIu64"\n", sum.get_cache_bulk); >> + fprintf(f, " get_cache_objs=%"PRIu64"\n", sum.get_cache_objs); >> fprintf(f, " get_success_bulk=%"PRIu64"\n", sum.get_success_bulk); >> fprintf(f, " get_success_objs=%"PRIu64"\n", sum.get_success_objs); >> fprintf(f, " get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk); >> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h >> index 848a19226149..0959f8a3f367 100644 >> --- a/lib/librte_mempool/rte_mempool.h >> +++ b/lib/librte_mempool/rte_mempool.h >> @@ -66,12 +66,20 @@ extern "C" { >> * A structure that stores the mempool statistics (per-lcore). >> */ >> struct rte_mempool_debug_stats { >> - uint64_t put_bulk; /**< Number of puts. */ >> - uint64_t put_objs; /**< Number of objects successfully put. */ >> - uint64_t get_success_bulk; /**< Successful allocation number. */ >> - uint64_t get_success_objs; /**< Objects successfully allocated. */ >> - uint64_t get_fail_bulk; /**< Failed allocation number. */ >> - uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ >> + uint64_t put_bulk; /**< Number of puts. */ >> + uint64_t put_objs; /**< Number of objects successfully put. */ >> + uint64_t put_common_pool_bulk; /**< Number of bulks enqueued in common pool. */ >> + uint64_t put_common_pool_objs; /**< Number of objects enqueued in common pool. */ >> + uint64_t put_cache_bulk; /**< Number of bulks enqueued in cache. */ >> + uint64_t put_cache_objs; /**< Number of objects enqueued in cache. */ >> + uint64_t get_common_pool_bulk; /**< Number of bulks dequeued from common pool. */ >> + uint64_t get_common_pool_objs; /**< Number of objects dequeued from common pool. */ >> + uint64_t get_cache_bulk; /**< Number of bulks dequeued from cache. */ >> + uint64_t get_cache_objs; /**< Number of objects dequeued from cache. */ >> + uint64_t get_success_bulk; /**< Successful allocation number. */ >> + uint64_t get_success_objs; /**< Objects successfully allocated. */ >> + uint64_t get_fail_bulk; /**< Failed allocation number. */ >> + uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ > > I missed it the first time, but this changes the size of the > rte_mempool_debug_stats structure. I think we don't care about this ABI > breakage because this structure is only defined if > RTE_LIBRTE_MEMPOOL_DEBUG is set. But just in case, adding Ray as Cc. Agreed, if it is just a debugging non-default feature. > About the field themselves, I'm not certain that there is an added value > to have stats for cache gets and puts. My feeling is that the important > stat to monitor is the access to common pool, because it is the one that > highlights a possible performance impact (contention). The cache stats > are more or less equal to "success + fail - common". Moreover, it will > simplify the patch and avoid risks of mistakes. > > What do you think? > >> /** Successful allocation number of contiguous blocks. */ >> uint64_t get_success_blks; >> /** Failed allocation number of contiguous blocks. */ >> @@ -699,10 +707,18 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp, >> void **obj_table, unsigned n) >> { >> struct rte_mempool_ops *ops; >> + int ret; >> >> rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n); >> ops = rte_mempool_get_ops(mp->ops_index); >> - return ops->dequeue(mp, obj_table, n); >> + ret = ops->dequeue(mp, obj_table, n); >> + if (ret == 0) { >> + __MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1); >> + __MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n); >> + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); >> + __MEMPOOL_STAT_ADD(mp, get_success_objs, n); >> + } >> + return ret; >> } >> >> /** >> @@ -749,6 +765,8 @@ rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void * const *obj_table, >> { >> struct rte_mempool_ops *ops; >> >> + __MEMPOOL_STAT_ADD(mp, put_common_pool_bulk, 1); >> + __MEMPOOL_STAT_ADD(mp, put_common_pool_objs, n); >> rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n); >> ops = rte_mempool_get_ops(mp->ops_index); >> return ops->enqueue(mp, obj_table, n); >> @@ -1297,14 +1315,18 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, >> >> /* Add elements back into the cache */ >> rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n); >> - >> cache->len += n; >> >> + __MEMPOOL_STAT_ADD(mp, put_cache_bulk, 1); >> + >> if (cache->len >= cache->flushthresh) { >> + __MEMPOOL_STAT_ADD(mp, put_cache_objs, >> + n - (cache->len - cache->size)); >> rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size], >> cache->len - cache->size); >> cache->len = cache->size; >> - } >> + } else >> + __MEMPOOL_STAT_ADD(mp, put_cache_objs, n); >> > > In case we keep cache stats, I'd add {} after the else to be consistent > with the if(). > >> return; >> >> @@ -1438,8 +1460,8 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, >> >> cache->len -= n; >> >> - __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); >> - __MEMPOOL_STAT_ADD(mp, get_success_objs, n); >> + __MEMPOOL_STAT_ADD(mp, get_cache_bulk, 1); >> + __MEMPOOL_STAT_ADD(mp, get_cache_objs, n); > > In case we keep cache stats, I don't think we should remove get_success > stats increment. Else, the success stats will never be incremented when > retrieving objects from the cache. > > >> >> return 0; >> >> @@ -1451,9 +1473,6 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, >> if (ret < 0) { >> __MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); >> __MEMPOOL_STAT_ADD(mp, get_fail_objs, n); >> - } else { >> - __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); >> - __MEMPOOL_STAT_ADD(mp, get_success_objs, n); >> } >> >> return ret; >> -- >> 2.17.1 >>
+Honnappa
> On Apr 22, 2021, at 8:29 PM, Dharmik Thakkar <Dharmik.Thakkar@arm.com> wrote:
>
> From: Joyce Kong <joyce.kong@arm.com>
>
> If cache is enabled, objects will be retrieved/put from/to cache,
> subsequently from/to the common pool. Now the debug stats calculate
> the objects retrieved/put from/to cache and pool together, it is
> better to distinguish them.
>
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
> lib/mempool/rte_mempool.c | 16 +++++++++++++++
> lib/mempool/rte_mempool.h | 43 ++++++++++++++++++++++++++-------------
> 2 files changed, 45 insertions(+), 14 deletions(-)
>
> diff --git a/lib/mempool/rte_mempool.c b/lib/mempool/rte_mempool.c
> index afb1239c8d48..e9343c2a7f6b 100644
> --- a/lib/mempool/rte_mempool.c
> +++ b/lib/mempool/rte_mempool.c
> @@ -1244,6 +1244,14 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
> for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> sum.put_bulk += mp->stats[lcore_id].put_bulk;
> sum.put_objs += mp->stats[lcore_id].put_objs;
> + sum.put_common_pool_bulk +=
> + mp->stats[lcore_id].put_common_pool_bulk;
> + sum.put_common_pool_objs +=
> + mp->stats[lcore_id].put_common_pool_objs;
> + sum.get_common_pool_bulk +=
> + mp->stats[lcore_id].get_common_pool_bulk;
> + sum.get_common_pool_objs +=
> + mp->stats[lcore_id].get_common_pool_objs;
> sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk;
> sum.get_success_objs += mp->stats[lcore_id].get_success_objs;
> sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk;
> @@ -1254,6 +1262,14 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
> fprintf(f, " stats:\n");
> fprintf(f, " put_bulk=%"PRIu64"\n", sum.put_bulk);
> fprintf(f, " put_objs=%"PRIu64"\n", sum.put_objs);
> + fprintf(f, " put_common_pool_bulk=%"PRIu64"\n",
> + sum.put_common_pool_bulk);
> + fprintf(f, " put_common_pool_objs=%"PRIu64"\n",
> + sum.put_common_pool_objs);
> + fprintf(f, " get_common_pool_bulk=%"PRIu64"\n",
> + sum.get_common_pool_bulk);
> + fprintf(f, " get_common_pool_objs=%"PRIu64"\n",
> + sum.get_common_pool_objs);
> fprintf(f, " get_success_bulk=%"PRIu64"\n", sum.get_success_bulk);
> fprintf(f, " get_success_objs=%"PRIu64"\n", sum.get_success_objs);
> fprintf(f, " get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk);
> diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h
> index 848a19226149..4343b287dc4e 100644
> --- a/lib/mempool/rte_mempool.h
> +++ b/lib/mempool/rte_mempool.h
> @@ -64,14 +64,21 @@ extern "C" {
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> /**
> * A structure that stores the mempool statistics (per-lcore).
> + * Note: Cache stats (put_cache_bulk/objs, get_cache_bulk/objs) are not
> + * captured since they can be calculated from other stats.
> + * For example: put_cache_objs = put_objs - put_common_pool_objs.
> */
> struct rte_mempool_debug_stats {
> - uint64_t put_bulk; /**< Number of puts. */
> - uint64_t put_objs; /**< Number of objects successfully put. */
> - uint64_t get_success_bulk; /**< Successful allocation number. */
> - uint64_t get_success_objs; /**< Objects successfully allocated. */
> - uint64_t get_fail_bulk; /**< Failed allocation number. */
> - uint64_t get_fail_objs; /**< Objects that failed to be allocated. */
> + uint64_t put_bulk; /**< Number of puts. */
> + uint64_t put_objs; /**< Number of objects successfully put. */
> + uint64_t put_common_pool_bulk; /**< Number of bulks enqueued in common pool. */
> + uint64_t put_common_pool_objs; /**< Number of objects enqueued in common pool. */
> + uint64_t get_common_pool_bulk; /**< Number of bulks dequeued from common pool. */
> + uint64_t get_common_pool_objs; /**< Number of objects dequeued from common pool. */
> + uint64_t get_success_bulk; /**< Successful allocation number. */
> + uint64_t get_success_objs; /**< Objects successfully allocated. */
> + uint64_t get_fail_bulk; /**< Failed allocation number. */
> + uint64_t get_fail_objs; /**< Objects that failed to be allocated. */
> /** Successful allocation number of contiguous blocks. */
> uint64_t get_success_blks;
> /** Failed allocation number of contiguous blocks. */
> @@ -699,10 +706,18 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp,
> void **obj_table, unsigned n)
> {
> struct rte_mempool_ops *ops;
> + int ret;
>
> rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n);
> ops = rte_mempool_get_ops(mp->ops_index);
> - return ops->dequeue(mp, obj_table, n);
> + ret = ops->dequeue(mp, obj_table, n);
> + if (ret == 0) {
> + __MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1);
> + __MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n);
> + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
> + __MEMPOOL_STAT_ADD(mp, get_success_objs, n);
> + }
> + return ret;
> }
>
> /**
> @@ -749,6 +764,8 @@ rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void * const *obj_table,
> {
> struct rte_mempool_ops *ops;
>
> + __MEMPOOL_STAT_ADD(mp, put_common_pool_bulk, 1);
> + __MEMPOOL_STAT_ADD(mp, put_common_pool_objs, n);
> rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n);
> ops = rte_mempool_get_ops(mp->ops_index);
> return ops->enqueue(mp, obj_table, n);
> @@ -1297,9 +1314,10 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
>
> /* Add elements back into the cache */
> rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n);
> -
> cache->len += n;
>
> + __MEMPOOL_STAT_ADD(mp, put_cache_bulk, 1);
> +
> if (cache->len >= cache->flushthresh) {
> rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size],
> cache->len - cache->size);
> @@ -1430,6 +1448,9 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
> }
>
> cache->len += req;
> + } else {
> + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
> + __MEMPOOL_STAT_ADD(mp, get_success_objs, n);
> }
>
> /* Now fill in the response ... */
> @@ -1438,9 +1459,6 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
>
> cache->len -= n;
>
> - __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
> - __MEMPOOL_STAT_ADD(mp, get_success_objs, n);
> -
> return 0;
>
> ring_dequeue:
> @@ -1451,9 +1469,6 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
> if (ret < 0) {
> __MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1);
> __MEMPOOL_STAT_ADD(mp, get_fail_objs, n);
> - } else {
> - __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
> - __MEMPOOL_STAT_ADD(mp, get_success_objs, n);
> }
>
> return ret;
> --
> 2.17.1
>
Hi Dharmik, Few comments below. On Thu, Apr 22, 2021 at 08:29:38PM -0500, Dharmik Thakkar wrote: > From: Joyce Kong <joyce.kong@arm.com> > > If cache is enabled, objects will be retrieved/put from/to cache, > subsequently from/to the common pool. Now the debug stats calculate > the objects retrieved/put from/to cache and pool together, it is > better to distinguish them. > > Signed-off-by: Joyce Kong <joyce.kong@arm.com> > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > --- > lib/mempool/rte_mempool.c | 16 +++++++++++++++ > lib/mempool/rte_mempool.h | 43 ++++++++++++++++++++++++++------------- > 2 files changed, 45 insertions(+), 14 deletions(-) > > diff --git a/lib/mempool/rte_mempool.c b/lib/mempool/rte_mempool.c > index afb1239c8d48..e9343c2a7f6b 100644 > --- a/lib/mempool/rte_mempool.c > +++ b/lib/mempool/rte_mempool.c > @@ -1244,6 +1244,14 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) > for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { > sum.put_bulk += mp->stats[lcore_id].put_bulk; > sum.put_objs += mp->stats[lcore_id].put_objs; > + sum.put_common_pool_bulk += > + mp->stats[lcore_id].put_common_pool_bulk; > + sum.put_common_pool_objs += > + mp->stats[lcore_id].put_common_pool_objs; > + sum.get_common_pool_bulk += > + mp->stats[lcore_id].get_common_pool_bulk; > + sum.get_common_pool_objs += > + mp->stats[lcore_id].get_common_pool_objs; > sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk; > sum.get_success_objs += mp->stats[lcore_id].get_success_objs; > sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk; > @@ -1254,6 +1262,14 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) > fprintf(f, " stats:\n"); > fprintf(f, " put_bulk=%"PRIu64"\n", sum.put_bulk); > fprintf(f, " put_objs=%"PRIu64"\n", sum.put_objs); > + fprintf(f, " put_common_pool_bulk=%"PRIu64"\n", > + sum.put_common_pool_bulk); > + fprintf(f, " put_common_pool_objs=%"PRIu64"\n", > + sum.put_common_pool_objs); > + fprintf(f, " get_common_pool_bulk=%"PRIu64"\n", > + sum.get_common_pool_bulk); > + fprintf(f, " get_common_pool_objs=%"PRIu64"\n", > + sum.get_common_pool_objs); > fprintf(f, " get_success_bulk=%"PRIu64"\n", sum.get_success_bulk); > fprintf(f, " get_success_objs=%"PRIu64"\n", sum.get_success_objs); > fprintf(f, " get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk); > diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h > index 848a19226149..4343b287dc4e 100644 > --- a/lib/mempool/rte_mempool.h > +++ b/lib/mempool/rte_mempool.h > @@ -64,14 +64,21 @@ extern "C" { > #ifdef RTE_LIBRTE_MEMPOOL_DEBUG > /** > * A structure that stores the mempool statistics (per-lcore). > + * Note: Cache stats (put_cache_bulk/objs, get_cache_bulk/objs) are not > + * captured since they can be calculated from other stats. > + * For example: put_cache_objs = put_objs - put_common_pool_objs. > */ > struct rte_mempool_debug_stats { > - uint64_t put_bulk; /**< Number of puts. */ > - uint64_t put_objs; /**< Number of objects successfully put. */ > - uint64_t get_success_bulk; /**< Successful allocation number. */ > - uint64_t get_success_objs; /**< Objects successfully allocated. */ > - uint64_t get_fail_bulk; /**< Failed allocation number. */ > - uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ > + uint64_t put_bulk; /**< Number of puts. */ > + uint64_t put_objs; /**< Number of objects successfully put. */ > + uint64_t put_common_pool_bulk; /**< Number of bulks enqueued in common pool. */ > + uint64_t put_common_pool_objs; /**< Number of objects enqueued in common pool. */ > + uint64_t get_common_pool_bulk; /**< Number of bulks dequeued from common pool. */ > + uint64_t get_common_pool_objs; /**< Number of objects dequeued from common pool. */ > + uint64_t get_success_bulk; /**< Successful allocation number. */ > + uint64_t get_success_objs; /**< Objects successfully allocated. */ > + uint64_t get_fail_bulk; /**< Failed allocation number. */ > + uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ > /** Successful allocation number of contiguous blocks. */ > uint64_t get_success_blks; > /** Failed allocation number of contiguous blocks. */ > @@ -699,10 +706,18 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp, > void **obj_table, unsigned n) > { > struct rte_mempool_ops *ops; > + int ret; > > rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n); > ops = rte_mempool_get_ops(mp->ops_index); > - return ops->dequeue(mp, obj_table, n); > + ret = ops->dequeue(mp, obj_table, n); > + if (ret == 0) { > + __MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1); > + __MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n); > + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); > + __MEMPOOL_STAT_ADD(mp, get_success_objs, n); > + } > + return ret; > } I think we should only have the common_pool stats here, for 2 reasons: - more consistent with put() - in case we are called by __mempool_generic_get() for a "backfill" operation, the number of successes will not be incremented by the correct value (the "req" variable is != n) > > /** > @@ -749,6 +764,8 @@ rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void * const *obj_table, > { > struct rte_mempool_ops *ops; > > + __MEMPOOL_STAT_ADD(mp, put_common_pool_bulk, 1); > + __MEMPOOL_STAT_ADD(mp, put_common_pool_objs, n); > rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n); > ops = rte_mempool_get_ops(mp->ops_index); > return ops->enqueue(mp, obj_table, n); > @@ -1297,9 +1314,10 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, > > /* Add elements back into the cache */ > rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n); > - > cache->len += n; > > + __MEMPOOL_STAT_ADD(mp, put_cache_bulk, 1); > + This one was forgotten, there is a compilation error: In file included from ../lib/mempool/rte_mempool_ops_default.c:7: ../lib/mempool/rte_mempool.h: In function ‘__mempool_generic_put’: ../lib/mempool/rte_mempool.h:1319:25: error: ‘struct rte_mempool_debug_stats’ has no member named ‘put_cache_bulk’; did you mean ‘put_bulk’? __MEMPOOL_STAT_ADD(mp, put_cache_bulk, 1); ^~~~~~~~~~~~~~ ../lib/mempool/rte_mempool.h:283:26: note: in definition of macro ‘__MEMPOOL_STAT_ADD’ mp->stats[__lcore_id].name += n; \ ^~~~ > if (cache->len >= cache->flushthresh) { > rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size], > cache->len - cache->size); > @@ -1430,6 +1448,9 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, > } > > cache->len += req; > + } else { > + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); > + __MEMPOOL_STAT_ADD(mp, get_success_objs, n); > } > > /* Now fill in the response ... */ > @@ -1438,9 +1459,6 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, > > cache->len -= n; > > - __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); > - __MEMPOOL_STAT_ADD(mp, get_success_objs, n); > - > return 0; > > ring_dequeue: > @@ -1451,9 +1469,6 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, > if (ret < 0) { > __MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); > __MEMPOOL_STAT_ADD(mp, get_fail_objs, n); > - } else { > - __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); > - __MEMPOOL_STAT_ADD(mp, get_success_objs, n); > } > > return ret; > -- > 2.17.1 >
On Thu, Apr 22, 2021 at 08:29:36PM -0500, Dharmik Thakkar wrote: > Subject: [dpdk-dev] [PATCH v4 0/2] lib/mempool: add debug stats I missed that one: please use "mempool:" instead of "lib/mempool:" prefix in the titles. Thanks, Olivier > > - Add debug counters for objects put/get to/from the common pool. > - Make __MEMPOOL_STAT_ADD() more generic > > --- > v4: > - Remove cache stats > > v3: > - Add a patch to make stat add macro generic > - Remove other stat add/subtract macros > - Rename counters for better understanding > - Add put/get cache bulk counters > > v2: > - Fix typo in the commit message > --- > > Dharmik Thakkar (1): > lib/mempool: make stats macro generic > > Joyce Kong (1): > lib/mempool: distinguish debug counters from cache and pool > > lib/mempool/rte_mempool.c | 16 ++++++++++ > lib/mempool/rte_mempool.h | 67 +++++++++++++++++++++++---------------- > 2 files changed, 56 insertions(+), 27 deletions(-) > > -- > 2.17.1 >
- Add debug counters for objects put/get to/from the common pool. - Make __MEMPOOL_STAT_ADD() more generic --- v5: - Fix compilation issue - Move 'increment get_success_bulk/objs' inside __mempool_generic_get v4: - Remove cache stats v3: - Add a patch to make stat add macro generic - Remove other stat add/subtract macros - Rename counters for better understanding - Add put/get cache bulk counters v2: - Fix typo in the commit message --- Dharmik Thakkar (1): mempool: make stats macro generic Joyce Kong (1): mempool: distinguish debug counters from cache and pool lib/mempool/rte_mempool.c | 16 ++++++++++ lib/mempool/rte_mempool.h | 63 +++++++++++++++++++++++---------------- 2 files changed, 54 insertions(+), 25 deletions(-) -- 2.17.1
Make __MEMPOOL_STAT_ADD macro more generic and delete __MEMPOOL_CONTIG_BLOCKS_STAT_ADD macro Suggested-by: Olivier Matz <olivier.matz@6wind.com> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Acked-by: Olivier Matz <olivier.matz@6wind.com> --- lib/mempool/rte_mempool.h | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h index c551cf733acf..848a19226149 100644 --- a/lib/mempool/rte_mempool.h +++ b/lib/mempool/rte_mempool.h @@ -273,20 +273,11 @@ struct rte_mempool { #define __MEMPOOL_STAT_ADD(mp, name, n) do { \ unsigned __lcore_id = rte_lcore_id(); \ if (__lcore_id < RTE_MAX_LCORE) { \ - mp->stats[__lcore_id].name##_objs += n; \ - mp->stats[__lcore_id].name##_bulk += 1; \ + mp->stats[__lcore_id].name += n; \ } \ } while(0) -#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do { \ - unsigned int __lcore_id = rte_lcore_id(); \ - if (__lcore_id < RTE_MAX_LCORE) { \ - mp->stats[__lcore_id].name##_blks += n; \ - mp->stats[__lcore_id].name##_bulk += 1; \ - } \ - } while (0) #else #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0) -#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do {} while (0) #endif /** @@ -1288,7 +1279,8 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table, void **cache_objs; /* increment stat now, adding in mempool always success */ - __MEMPOOL_STAT_ADD(mp, put, n); + __MEMPOOL_STAT_ADD(mp, put_bulk, 1); + __MEMPOOL_STAT_ADD(mp, put_objs, n); /* No cache provided or if put would overflow mem allocated for cache */ if (unlikely(cache == NULL || n > RTE_MEMPOOL_CACHE_MAX_SIZE)) @@ -1446,7 +1438,8 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, cache->len -= n; - __MEMPOOL_STAT_ADD(mp, get_success, n); + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_success_objs, n); return 0; @@ -1455,10 +1448,13 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table, /* get remaining objects from ring */ ret = rte_mempool_ops_dequeue_bulk(mp, obj_table, n); - if (ret < 0) - __MEMPOOL_STAT_ADD(mp, get_fail, n); - else - __MEMPOOL_STAT_ADD(mp, get_success, n); + if (ret < 0) { + __MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_fail_objs, n); + } else { + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_success_objs, n); + } return ret; } @@ -1581,11 +1577,13 @@ rte_mempool_get_contig_blocks(struct rte_mempool *mp, ret = rte_mempool_ops_dequeue_contig_blocks(mp, first_obj_table, n); if (ret == 0) { - __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, get_success, n); + __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_success_blks, n); __mempool_contig_blocks_check_cookies(mp, first_obj_table, n, 1); } else { - __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, get_fail, n); + __MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_fail_blks, n); } rte_mempool_trace_get_contig_blocks(mp, first_obj_table, n); -- 2.17.1
From: Joyce Kong <joyce.kong@arm.com> If cache is enabled, objects will be retrieved/put from/to cache, subsequently from/to the common pool. Now the debug stats calculate the objects retrieved/put from/to cache and pool together, it is better to distinguish them. Signed-off-by: Joyce Kong <joyce.kong@arm.com> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> --- lib/mempool/rte_mempool.c | 16 ++++++++++++++++ lib/mempool/rte_mempool.h | 29 ++++++++++++++++++++++------- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/lib/mempool/rte_mempool.c b/lib/mempool/rte_mempool.c index afb1239c8d48..e9343c2a7f6b 100644 --- a/lib/mempool/rte_mempool.c +++ b/lib/mempool/rte_mempool.c @@ -1244,6 +1244,14 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { sum.put_bulk += mp->stats[lcore_id].put_bulk; sum.put_objs += mp->stats[lcore_id].put_objs; + sum.put_common_pool_bulk += + mp->stats[lcore_id].put_common_pool_bulk; + sum.put_common_pool_objs += + mp->stats[lcore_id].put_common_pool_objs; + sum.get_common_pool_bulk += + mp->stats[lcore_id].get_common_pool_bulk; + sum.get_common_pool_objs += + mp->stats[lcore_id].get_common_pool_objs; sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk; sum.get_success_objs += mp->stats[lcore_id].get_success_objs; sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk; @@ -1254,6 +1262,14 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) fprintf(f, " stats:\n"); fprintf(f, " put_bulk=%"PRIu64"\n", sum.put_bulk); fprintf(f, " put_objs=%"PRIu64"\n", sum.put_objs); + fprintf(f, " put_common_pool_bulk=%"PRIu64"\n", + sum.put_common_pool_bulk); + fprintf(f, " put_common_pool_objs=%"PRIu64"\n", + sum.put_common_pool_objs); + fprintf(f, " get_common_pool_bulk=%"PRIu64"\n", + sum.get_common_pool_bulk); + fprintf(f, " get_common_pool_objs=%"PRIu64"\n", + sum.get_common_pool_objs); fprintf(f, " get_success_bulk=%"PRIu64"\n", sum.get_success_bulk); fprintf(f, " get_success_objs=%"PRIu64"\n", sum.get_success_objs); fprintf(f, " get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk); diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h index 848a19226149..7089baab31e3 100644 --- a/lib/mempool/rte_mempool.h +++ b/lib/mempool/rte_mempool.h @@ -64,14 +64,21 @@ extern "C" { #ifdef RTE_LIBRTE_MEMPOOL_DEBUG /** * A structure that stores the mempool statistics (per-lcore). + * Note: Cache stats (put_cache_bulk/objs, get_cache_bulk/objs) are not + * captured since they can be calculated from other stats. + * For example: put_cache_objs = put_objs - put_common_pool_objs. */ struct rte_mempool_debug_stats { - uint64_t put_bulk; /**< Number of puts. */ - uint64_t put_objs; /**< Number of objects successfully put. */ - uint64_t get_success_bulk; /**< Successful allocation number. */ - uint64_t get_success_objs; /**< Objects successfully allocated. */ - uint64_t get_fail_bulk; /**< Failed allocation number. */ - uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ + uint64_t put_bulk; /**< Number of puts. */ + uint64_t put_objs; /**< Number of objects successfully put. */ + uint64_t put_common_pool_bulk; /**< Number of bulks enqueued in common pool. */ + uint64_t put_common_pool_objs; /**< Number of objects enqueued in common pool. */ + uint64_t get_common_pool_bulk; /**< Number of bulks dequeued from common pool. */ + uint64_t get_common_pool_objs; /**< Number of objects dequeued from common pool. */ + uint64_t get_success_bulk; /**< Successful allocation number. */ + uint64_t get_success_objs; /**< Objects successfully allocated. */ + uint64_t get_fail_bulk; /**< Failed allocation number. */ + uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ /** Successful allocation number of contiguous blocks. */ uint64_t get_success_blks; /** Failed allocation number of contiguous blocks. */ @@ -699,10 +706,16 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp, void **obj_table, unsigned n) { struct rte_mempool_ops *ops; + int ret; rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n); ops = rte_mempool_get_ops(mp->ops_index); - return ops->dequeue(mp, obj_table, n); + ret = ops->dequeue(mp, obj_table, n); + if (ret == 0) { + __MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1); + __MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n); + } + return ret; } /** @@ -749,6 +762,8 @@ rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void * const *obj_table, { struct rte_mempool_ops *ops; + __MEMPOOL_STAT_ADD(mp, put_common_pool_bulk, 1); + __MEMPOOL_STAT_ADD(mp, put_common_pool_objs, n); rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n); ops = rte_mempool_get_ops(mp->ops_index); return ops->enqueue(mp, obj_table, n); -- 2.17.1
On Tue, Apr 27, 2021 at 11:01:40AM -0500, Dharmik Thakkar wrote:
> From: Joyce Kong <joyce.kong@arm.com>
>
> If cache is enabled, objects will be retrieved/put from/to cache,
> subsequently from/to the common pool. Now the debug stats calculate
> the objects retrieved/put from/to cache and pool together, it is
> better to distinguish them.
>
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Acked-by: Olivier Matz <olivier.matz@6wind.com>
On Tue, Apr 27, 2021 at 6:02 PM Dharmik Thakkar <dharmik.thakkar@arm.com> wrote:
>
> - Add debug counters for objects put/get to/from the common pool.
> - Make __MEMPOOL_STAT_ADD() more generic
>
Series applied, thanks.
--
David Marchand