From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by dpdk.org (Postfix) with ESMTP id 61DEB37B3 for ; Tue, 28 Mar 2017 22:36:54 +0200 (CEST) Received: from orsmga005.jf.intel.com ([10.7.209.41]) by orsmga105.jf.intel.com with ESMTP; 28 Mar 2017 13:36:54 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.36,238,1486454400"; d="scan'208";a="80340088" Received: from sivswdev01.ir.intel.com ([10.237.217.45]) by orsmga005.jf.intel.com with ESMTP; 28 Mar 2017 13:36:52 -0700 From: Bruce Richardson To: olivier.matz@6wind.com Cc: thomas.monjalon@6wind.com, dev@dpdk.org, Bruce Richardson Date: Tue, 28 Mar 2017 21:36:04 +0100 Message-Id: <20170328203606.27457-13-bruce.richardson@intel.com> X-Mailer: git-send-email 2.8.4 In-Reply-To: <20170328203606.27457-1-bruce.richardson@intel.com> References: <20170324171008.29355-1-bruce.richardson@intel.com> <20170328203606.27457-1-bruce.richardson@intel.com> Subject: [dpdk-dev] [PATCH v4 12/14] ring: separate out head index manipulation for enq/deq X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 28 Mar 2017 20:36:55 -0000 We can write a single common function for head manipulation for enq and a common one for deq, allowing us to have a single worker function for enq and deq, rather than two of each. Update all other inline functions to use the new functions. Signed-off-by: Bruce Richardson Acked-by: Olivier Matz --- V3: renamed parameter "is_mp" in __rte_ring_do_dequeue to the correct "is_sc" --- lib/librte_ring/rte_ring.c | 4 +- lib/librte_ring/rte_ring.h | 328 ++++++++++++++++++++------------------------- 2 files changed, 149 insertions(+), 183 deletions(-) diff --git a/lib/librte_ring/rte_ring.c b/lib/librte_ring/rte_ring.c index 25f64f0..5f98c33 100644 --- a/lib/librte_ring/rte_ring.c +++ b/lib/librte_ring/rte_ring.c @@ -138,8 +138,8 @@ rte_ring_init(struct rte_ring *r, const char *name, unsigned count, if (ret < 0 || ret >= (int)sizeof(r->name)) return -ENAMETOOLONG; r->flags = flags; - r->prod.single = !!(flags & RING_F_SP_ENQ); - r->cons.single = !!(flags & RING_F_SC_DEQ); + r->prod.single = (flags & RING_F_SP_ENQ) ? __IS_SP : __IS_MP; + r->cons.single = (flags & RING_F_SC_DEQ) ? __IS_SC : __IS_MC; r->size = count; r->mask = count - 1; r->prod.head = r->cons.head = 0; diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h index e801510..3d8f738 100644 --- a/lib/librte_ring/rte_ring.h +++ b/lib/librte_ring/rte_ring.h @@ -169,6 +169,12 @@ struct rte_ring { #define RING_F_SC_DEQ 0x0002 /**< The default dequeue is "single-consumer". */ #define RTE_RING_SZ_MASK (unsigned)(0x0fffffff) /**< Ring size mask */ +/* @internal defines for passing to the enqueue dequeue worker functions */ +#define __IS_SP 1 +#define __IS_MP 0 +#define __IS_SC 1 +#define __IS_MC 0 + /** * Calculate the memory size needed for a ring * @@ -287,7 +293,7 @@ void rte_ring_dump(FILE *f, const struct rte_ring *r); #define ENQUEUE_PTRS() do { \ unsigned int i; \ const uint32_t size = r->size; \ - uint32_t idx = prod_head & mask; \ + uint32_t idx = prod_head & r->mask; \ if (likely(idx + n < size)) { \ for (i = 0; i < (n & ((~(unsigned)0x3))); i+=4, idx+=4) { \ r->ring[idx] = obj_table[i]; \ @@ -313,7 +319,7 @@ void rte_ring_dump(FILE *f, const struct rte_ring *r); * single and multi consumer dequeue functions */ #define DEQUEUE_PTRS() do { \ unsigned int i; \ - uint32_t idx = cons_head & mask; \ + uint32_t idx = cons_head & r->mask; \ const uint32_t size = r->size; \ if (likely(idx + n < size)) { \ for (i = 0; i < (n & (~(unsigned)0x3)); i+=4, idx+=4) {\ @@ -336,83 +342,72 @@ void rte_ring_dump(FILE *f, const struct rte_ring *r); } while (0) /** - * @internal Enqueue several objects on the ring (multi-producers safe). - * - * This function uses a "compare and set" instruction to move the - * producer index atomically. + * @internal This function updates the producer head for enqueue * * @param r - * A pointer to the ring structure. - * @param obj_table - * A pointer to a table of void * pointers (objects). + * A pointer to the ring structure + * @param is_sp + * Indicates whether multi-producer path is needed or not * @param n - * The number of objects to add in the ring from the obj_table. + * The number of elements we will want to enqueue, i.e. how far should the + * head be moved * @param behavior * RTE_RING_QUEUE_FIXED: Enqueue a fixed number of items from a ring - * RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring + * RTE_RING_QUEUE_VARIABLE: Enqueue as many items as possible from ring + * @param old_head + * Returns head value as it was before the move, i.e. where enqueue starts + * @param new_head + * Returns the current/new head value i.e. where enqueue finishes + * @param free_entries + * Returns the amount of free space in the ring BEFORE head was moved * @return * Actual number of objects enqueued. * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. */ -static inline unsigned int __attribute__((always_inline)) -__rte_ring_mp_do_enqueue(struct rte_ring *r, void * const *obj_table, - unsigned int n, enum rte_ring_queue_behavior behavior, - unsigned int *free_space) +static inline __attribute__((always_inline)) unsigned int +__rte_ring_move_prod_head(struct rte_ring *r, int is_sp, + unsigned int n, enum rte_ring_queue_behavior behavior, + uint32_t *old_head, uint32_t *new_head, + uint32_t *free_entries) { - uint32_t prod_head, prod_next; - uint32_t cons_tail, free_entries; - const unsigned int max = n; + const uint32_t mask = r->mask; + unsigned int max = n; int success; - uint32_t mask = r->mask; - /* move prod.head atomically */ do { /* Reset n to the initial burst count */ n = max; - prod_head = r->prod.head; - cons_tail = r->cons.tail; + *old_head = r->prod.head; + const uint32_t cons_tail = r->cons.tail; /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have - * prod_head > cons_tail). So 'free_entries' is always between 0 + * *old_head > cons_tail). So 'free_entries' is always between 0 * and size(ring)-1. */ - free_entries = (mask + cons_tail - prod_head); + *free_entries = (mask + cons_tail - *old_head); /* check that we have enough room in ring */ - if (unlikely(n > free_entries)) + if (unlikely(n > *free_entries)) n = (behavior == RTE_RING_QUEUE_FIXED) ? - 0 : free_entries; + 0 : *free_entries; if (n == 0) - goto end; - - prod_next = prod_head + n; - success = rte_atomic32_cmpset(&r->prod.head, prod_head, - prod_next); + return 0; + + *new_head = *old_head + n; + if (is_sp) + r->prod.head = *new_head, success = 1; + else + success = rte_atomic32_cmpset(&r->prod.head, + *old_head, *new_head); } while (unlikely(success == 0)); - - /* write entries in ring */ - ENQUEUE_PTRS(); - rte_smp_wmb(); - - /* - * If there are other enqueues in progress that preceded us, - * we need to wait for them to complete - */ - while (unlikely(r->prod.tail != prod_head)) - rte_pause(); - - r->prod.tail = prod_next; -end: - if (free_space != NULL) - *free_space = free_entries - n; return n; } /** - * @internal Enqueue several objects on a ring (NOT multi-producers safe). + * @internal Enqueue several objects on the ring * - * @param r + * @param r * A pointer to the ring structure. * @param obj_table * A pointer to a table of void * pointers (objects). @@ -420,44 +415,40 @@ __rte_ring_mp_do_enqueue(struct rte_ring *r, void * const *obj_table, * The number of objects to add in the ring from the obj_table. * @param behavior * RTE_RING_QUEUE_FIXED: Enqueue a fixed number of items from a ring - * RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring + * RTE_RING_QUEUE_VARIABLE: Enqueue as many items as possible from ring + * @param is_sp + * Indicates whether to use single producer or multi-producer head update + * @param free_space + * returns the amount of space after the enqueue operation has finished * @return * Actual number of objects enqueued. * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. */ -static inline unsigned int __attribute__((always_inline)) -__rte_ring_sp_do_enqueue(struct rte_ring *r, void * const *obj_table, - unsigned int n, enum rte_ring_queue_behavior behavior, - unsigned int *free_space) +static inline __attribute__((always_inline)) unsigned int +__rte_ring_do_enqueue(struct rte_ring *r, void * const *obj_table, + unsigned int n, enum rte_ring_queue_behavior behavior, + int is_sp, unsigned int *free_space) { - uint32_t prod_head, cons_tail; - uint32_t prod_next, free_entries; - uint32_t mask = r->mask; - - prod_head = r->prod.head; - cons_tail = r->cons.tail; - /* The subtraction is done between two unsigned 32bits value - * (the result is always modulo 32 bits even if we have - * prod_head > cons_tail). So 'free_entries' is always between 0 - * and size(ring)-1. */ - free_entries = mask + cons_tail - prod_head; - - /* check that we have enough room in ring */ - if (unlikely(n > free_entries)) - n = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : free_entries; + uint32_t prod_head, prod_next; + uint32_t free_entries; + n = __rte_ring_move_prod_head(r, is_sp, n, behavior, + &prod_head, &prod_next, &free_entries); if (n == 0) goto end; - - prod_next = prod_head + n; - r->prod.head = prod_next; - - /* write entries in ring */ ENQUEUE_PTRS(); rte_smp_wmb(); + /* + * If there are other enqueues in progress that preceded us, + * we need to wait for them to complete + */ + while (unlikely(r->prod.tail != prod_head)) + rte_pause(); + r->prod.tail = prod_next; + end: if (free_space != NULL) *free_space = free_entries - n; @@ -465,130 +456,112 @@ __rte_ring_sp_do_enqueue(struct rte_ring *r, void * const *obj_table, } /** - * @internal Dequeue several objects from a ring (multi-consumers safe). When - * the request objects are more than the available objects, only dequeue the - * actual number of objects - * - * This function uses a "compare and set" instruction to move the - * consumer index atomically. + * @internal This function updates the consumer head for dequeue * * @param r - * A pointer to the ring structure. - * @param obj_table - * A pointer to a table of void * pointers (objects) that will be filled. + * A pointer to the ring structure + * @param is_sc + * Indicates whether multi-consumer path is needed or not * @param n - * The number of objects to dequeue from the ring to the obj_table. + * The number of elements we will want to enqueue, i.e. how far should the + * head be moved * @param behavior * RTE_RING_QUEUE_FIXED: Dequeue a fixed number of items from a ring - * RTE_RING_QUEUE_VARIABLE: Dequeue as many items a possible from ring + * RTE_RING_QUEUE_VARIABLE: Dequeue as many items as possible from ring + * @param old_head + * Returns head value as it was before the move, i.e. where dequeue starts + * @param new_head + * Returns the current/new head value i.e. where dequeue finishes + * @param entries + * Returns the number of entries in the ring BEFORE head was moved * @return * - Actual number of objects dequeued. * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. */ - -static inline unsigned int __attribute__((always_inline)) -__rte_ring_mc_do_dequeue(struct rte_ring *r, void **obj_table, - unsigned int n, enum rte_ring_queue_behavior behavior, - unsigned int *available) +static inline __attribute__((always_inline)) unsigned int +__rte_ring_move_cons_head(struct rte_ring *r, int is_sc, + unsigned int n, enum rte_ring_queue_behavior behavior, + uint32_t *old_head, uint32_t *new_head, + uint32_t *entries) { - uint32_t cons_head, prod_tail; - uint32_t cons_next, entries; - const unsigned max = n; + unsigned int max = n; int success; - uint32_t mask = r->mask; /* move cons.head atomically */ do { /* Restore n as it may change every loop */ n = max; - cons_head = r->cons.head; - prod_tail = r->prod.tail; + *old_head = r->cons.head; + const uint32_t prod_tail = r->prod.tail; /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * cons_head > prod_tail). So 'entries' is always between 0 * and size(ring)-1. */ - entries = (prod_tail - cons_head); + *entries = (prod_tail - *old_head); /* Set the actual entries for dequeue */ - if (n > entries) - n = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : entries; + if (n > *entries) + n = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : *entries; if (unlikely(n == 0)) - goto end; - - cons_next = cons_head + n; - success = rte_atomic32_cmpset(&r->cons.head, cons_head, - cons_next); + return 0; + + *new_head = *old_head + n; + if (is_sc) + r->cons.head = *new_head, success = 1; + else + success = rte_atomic32_cmpset(&r->cons.head, *old_head, + *new_head); } while (unlikely(success == 0)); - - /* copy in table */ - DEQUEUE_PTRS(); - rte_smp_rmb(); - - /* - * If there are other dequeues in progress that preceded us, - * we need to wait for them to complete - */ - while (unlikely(r->cons.tail != cons_head)) - rte_pause(); - - r->cons.tail = cons_next; -end: - if (available != NULL) - *available = entries - n; return n; } /** - * @internal Dequeue several objects from a ring (NOT multi-consumers safe). - * When the request objects are more than the available objects, only dequeue - * the actual number of objects + * @internal Dequeue several objects from the ring * * @param r * A pointer to the ring structure. * @param obj_table - * A pointer to a table of void * pointers (objects) that will be filled. + * A pointer to a table of void * pointers (objects). * @param n - * The number of objects to dequeue from the ring to the obj_table. + * The number of objects to pull from the ring. * @param behavior * RTE_RING_QUEUE_FIXED: Dequeue a fixed number of items from a ring - * RTE_RING_QUEUE_VARIABLE: Dequeue as many items a possible from ring + * RTE_RING_QUEUE_VARIABLE: Dequeue as many items as possible from ring + * @param is_sc + * Indicates whether to use single consumer or multi-consumer head update + * @param available + * returns the number of remaining ring entries after the dequeue has finished * @return * - Actual number of objects dequeued. * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. */ -static inline unsigned int __attribute__((always_inline)) -__rte_ring_sc_do_dequeue(struct rte_ring *r, void **obj_table, +static inline __attribute__((always_inline)) unsigned int +__rte_ring_do_dequeue(struct rte_ring *r, void **obj_table, unsigned int n, enum rte_ring_queue_behavior behavior, - unsigned int *available) + int is_sc, unsigned int *available) { - uint32_t cons_head, prod_tail; - uint32_t cons_next, entries; - uint32_t mask = r->mask; - - cons_head = r->cons.head; - prod_tail = r->prod.tail; - /* The subtraction is done between two unsigned 32bits value - * (the result is always modulo 32 bits even if we have - * cons_head > prod_tail). So 'entries' is always between 0 - * and size(ring)-1. */ - entries = prod_tail - cons_head; - - if (n > entries) - n = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : entries; - - if (unlikely(entries == 0)) - goto end; + uint32_t cons_head, cons_next; + uint32_t entries; - cons_next = cons_head + n; - r->cons.head = cons_next; + n = __rte_ring_move_cons_head(r, is_sc, n, behavior, + &cons_head, &cons_next, &entries); + if (n == 0) + goto end; - /* copy in table */ DEQUEUE_PTRS(); rte_smp_rmb(); + /* + * If there are other enqueues in progress that preceded us, + * we need to wait for them to complete + */ + while (unlikely(r->cons.tail != cons_head)) + rte_pause(); + r->cons.tail = cons_next; + end: if (available != NULL) *available = entries - n; @@ -614,8 +587,8 @@ static inline unsigned int __attribute__((always_inline)) rte_ring_mp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { - return __rte_ring_mp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED, - free_space); + return __rte_ring_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED, + __IS_MP, free_space); } /** @@ -634,8 +607,8 @@ static inline unsigned int __attribute__((always_inline)) rte_ring_sp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { - return __rte_ring_sp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED, - free_space); + return __rte_ring_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED, + __IS_SP, free_space); } /** @@ -658,10 +631,8 @@ static inline unsigned int __attribute__((always_inline)) rte_ring_enqueue_bulk(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { - if (r->prod.single) - return rte_ring_sp_enqueue_bulk(r, obj_table, n, free_space); - else - return rte_ring_mp_enqueue_bulk(r, obj_table, n, free_space); + return __rte_ring_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED, + r->prod.single, free_space); } /** @@ -741,8 +712,8 @@ static inline unsigned int __attribute__((always_inline)) rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { - return __rte_ring_mc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED, - available); + return __rte_ring_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED, + __IS_MC, available); } /** @@ -762,8 +733,8 @@ static inline unsigned int __attribute__((always_inline)) rte_ring_sc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { - return __rte_ring_sc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED, - available); + return __rte_ring_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED, + __IS_SC, available); } /** @@ -786,10 +757,8 @@ static inline unsigned int __attribute__((always_inline)) rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { - if (r->cons.single) - return rte_ring_sc_dequeue_bulk(r, obj_table, n, available); - else - return rte_ring_mc_dequeue_bulk(r, obj_table, n, available); + return __rte_ring_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED, + r->cons.single, available); } /** @@ -972,8 +941,8 @@ static inline unsigned __attribute__((always_inline)) rte_ring_mp_enqueue_burst(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { - return __rte_ring_mp_do_enqueue(r, obj_table, n, - RTE_RING_QUEUE_VARIABLE, free_space); + return __rte_ring_do_enqueue(r, obj_table, n, + RTE_RING_QUEUE_VARIABLE, __IS_MP, free_space); } /** @@ -992,8 +961,8 @@ static inline unsigned __attribute__((always_inline)) rte_ring_sp_enqueue_burst(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { - return __rte_ring_sp_do_enqueue(r, obj_table, n, - RTE_RING_QUEUE_VARIABLE, free_space); + return __rte_ring_do_enqueue(r, obj_table, n, + RTE_RING_QUEUE_VARIABLE, __IS_SP, free_space); } /** @@ -1016,10 +985,8 @@ static inline unsigned __attribute__((always_inline)) rte_ring_enqueue_burst(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { - if (r->prod.single) - return rte_ring_sp_enqueue_burst(r, obj_table, n, free_space); - else - return rte_ring_mp_enqueue_burst(r, obj_table, n, free_space); + return __rte_ring_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE, + r->prod.single, free_space); } /** @@ -1043,8 +1010,8 @@ static inline unsigned __attribute__((always_inline)) rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { - return __rte_ring_mc_do_dequeue(r, obj_table, n, - RTE_RING_QUEUE_VARIABLE, available); + return __rte_ring_do_dequeue(r, obj_table, n, + RTE_RING_QUEUE_VARIABLE, __IS_MC, available); } /** @@ -1065,8 +1032,8 @@ static inline unsigned __attribute__((always_inline)) rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { - return __rte_ring_sc_do_dequeue(r, obj_table, n, - RTE_RING_QUEUE_VARIABLE, available); + return __rte_ring_do_dequeue(r, obj_table, n, + RTE_RING_QUEUE_VARIABLE, __IS_SC, available); } /** @@ -1089,10 +1056,9 @@ static inline unsigned __attribute__((always_inline)) rte_ring_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { - if (r->cons.single) - return rte_ring_sc_dequeue_burst(r, obj_table, n, available); - else - return rte_ring_mc_dequeue_burst(r, obj_table, n, available); + return __rte_ring_do_dequeue(r, obj_table, n, + RTE_RING_QUEUE_VARIABLE, + r->cons.single, available); } #ifdef __cplusplus -- 2.9.3