From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 4DFACA3201 for ; Mon, 21 Oct 2019 02:24:30 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 9D81E2BCE; Mon, 21 Oct 2019 02:23:52 +0200 (CEST) Received: from foss.arm.com (unknown [217.140.110.172]) by dpdk.org (Postfix) with ESMTP id 6C14D2951 for ; Mon, 21 Oct 2019 02:23:46 +0200 (CEST) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 0D8E11007; Sun, 20 Oct 2019 17:23:35 -0700 (PDT) Received: from qc2400f-1.austin.arm.com (qc2400f-1.austin.arm.com [10.118.12.34]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id EAB603F71F; Sun, 20 Oct 2019 17:23:34 -0700 (PDT) From: Honnappa Nagarahalli To: olivier.matz@6wind.com, sthemmin@microsoft.com, jerinj@marvell.com, bruce.richardson@intel.com, david.marchand@redhat.com, pbhagavatula@marvell.com, konstantin.ananyev@intel.com, drc@linux.vnet.ibm.com, hemant.agrawal@nxp.com, honnappa.nagarahalli@arm.com Cc: dev@dpdk.org, dharmik.thakkar@arm.com, ruifeng.wang@arm.com, gavin.hu@arm.com Date: Sun, 20 Oct 2019 19:22:59 -0500 Message-Id: <20191021002300.26497-6-honnappa.nagarahalli@arm.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20191021002300.26497-1-honnappa.nagarahalli@arm.com> References: <20190906190510.11146-1-honnappa.nagarahalli@arm.com> <20191021002300.26497-1-honnappa.nagarahalli@arm.com> Subject: [dpdk-dev] [RFC v6 5/6] lib/ring: copy ring elements using memcpy partially X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Copy of ring elements uses memcpy for 32B chunks. The remaining bytes are copied using assignments. Signed-off-by: Honnappa Nagarahalli --- lib/librte_ring/rte_ring.c | 10 -- lib/librte_ring/rte_ring_elem.h | 229 +++++++------------------------- 2 files changed, 49 insertions(+), 190 deletions(-) diff --git a/lib/librte_ring/rte_ring.c b/lib/librte_ring/rte_ring.c index e95285259..0f7f4b598 100644 --- a/lib/librte_ring/rte_ring.c +++ b/lib/librte_ring/rte_ring.c @@ -51,16 +51,6 @@ rte_ring_get_memsize_elem(unsigned count, unsigned esize) { ssize_t sz; - /* Supported esize values are 4/8/16. - * Others can be added on need basis. - */ - if (esize != 4 && esize != 8 && esize != 16) { - RTE_LOG(ERR, RING, - "Unsupported esize value. Supported values are 4, 8 and 16\n"); - - return -EINVAL; - } - /* count must be a power of 2 */ if ((!POWEROF2(count)) || (count > RTE_RING_SZ_MASK )) { RTE_LOG(ERR, RING, diff --git a/lib/librte_ring/rte_ring_elem.h b/lib/librte_ring/rte_ring_elem.h index 7e9914567..0ce5f2be7 100644 --- a/lib/librte_ring/rte_ring_elem.h +++ b/lib/librte_ring/rte_ring_elem.h @@ -24,6 +24,7 @@ extern "C" { #include #include #include +#include #include #include #include @@ -108,215 +109,83 @@ __rte_experimental struct rte_ring *rte_ring_create_elem(const char *name, unsigned int count, unsigned int esize, int socket_id, unsigned int flags); -/* the actual enqueue of pointers on the ring. - * Placed here since identical code needed in both - * single and multi producer enqueue functions. - */ -#define ENQUEUE_PTRS_ELEM(r, ring_start, prod_head, obj_table, esize, n) do { \ - if (esize == 4) \ - ENQUEUE_PTRS_32(r, ring_start, prod_head, obj_table, n); \ - else if (esize == 8) \ - ENQUEUE_PTRS_64(r, ring_start, prod_head, obj_table, n); \ - else if (esize == 16) \ - ENQUEUE_PTRS_128(r, ring_start, prod_head, obj_table, n); \ -} while (0) - -#define ENQUEUE_PTRS_32(r, ring_start, prod_head, obj_table, n) do { \ - unsigned int i; \ +#define ENQUEUE_PTRS_GEN(r, ring_start, prod_head, obj_table, esize, n) do { \ + unsigned int i, j; \ const uint32_t size = (r)->size; \ uint32_t idx = prod_head & (r)->mask; \ uint32_t *ring = (uint32_t *)ring_start; \ uint32_t *obj = (uint32_t *)obj_table; \ - if (likely(idx + n < size)) { \ - for (i = 0; i < (n & ((~(uint32_t)0x7))); i += 8, idx += 8) { \ - ring[idx] = obj[i]; \ - ring[idx + 1] = obj[i + 1]; \ - ring[idx + 2] = obj[i + 2]; \ - ring[idx + 3] = obj[i + 3]; \ - ring[idx + 4] = obj[i + 4]; \ - ring[idx + 5] = obj[i + 5]; \ - ring[idx + 6] = obj[i + 6]; \ - ring[idx + 7] = obj[i + 7]; \ + uint32_t nr_n = n * (esize / sizeof(uint32_t)); \ + uint32_t nr_idx = idx * (esize / sizeof(uint32_t)); \ + uint32_t seg0 = size - idx; \ + if (likely(n < seg0)) { \ + for (i = 0; i < (nr_n & ((~(unsigned)0x7))); \ + i += 8, nr_idx += 8) { \ + memcpy(ring + nr_idx, obj + i, 8 * sizeof (uint32_t)); \ } \ - switch (n & 0x7) { \ + switch (nr_n & 0x7) { \ case 7: \ - ring[idx++] = obj[i++]; /* fallthrough */ \ + ring[nr_idx++] = obj[i++]; /* fallthrough */ \ case 6: \ - ring[idx++] = obj[i++]; /* fallthrough */ \ + ring[nr_idx++] = obj[i++]; /* fallthrough */ \ case 5: \ - ring[idx++] = obj[i++]; /* fallthrough */ \ + ring[nr_idx++] = obj[i++]; /* fallthrough */ \ case 4: \ - ring[idx++] = obj[i++]; /* fallthrough */ \ + ring[nr_idx++] = obj[i++]; /* fallthrough */ \ case 3: \ - ring[idx++] = obj[i++]; /* fallthrough */ \ + ring[nr_idx++] = obj[i++]; /* fallthrough */ \ case 2: \ - ring[idx++] = obj[i++]; /* fallthrough */ \ - case 1: \ - ring[idx++] = obj[i++]; /* fallthrough */ \ - } \ - } else { \ - for (i = 0; idx < size; i++, idx++)\ - ring[idx] = obj[i]; \ - for (idx = 0; i < n; i++, idx++) \ - ring[idx] = obj[i]; \ - } \ -} while (0) - -#define ENQUEUE_PTRS_64(r, ring_start, prod_head, obj_table, n) do { \ - unsigned int i; \ - const uint32_t size = (r)->size; \ - uint32_t idx = prod_head & (r)->mask; \ - uint64_t *ring = (uint64_t *)ring_start; \ - uint64_t *obj = (uint64_t *)obj_table; \ - if (likely(idx + n < size)) { \ - for (i = 0; i < (n & ((~(uint32_t)0x3))); i += 4, idx += 4) { \ - ring[idx] = obj[i]; \ - ring[idx + 1] = obj[i + 1]; \ - ring[idx + 2] = obj[i + 2]; \ - ring[idx + 3] = obj[i + 3]; \ - } \ - switch (n & 0x3) { \ - case 3: \ - ring[idx++] = obj[i++]; /* fallthrough */ \ - case 2: \ - ring[idx++] = obj[i++]; /* fallthrough */ \ - case 1: \ - ring[idx++] = obj[i++]; \ - } \ - } else { \ - for (i = 0; idx < size; i++, idx++)\ - ring[idx] = obj[i]; \ - for (idx = 0; i < n; i++, idx++) \ - ring[idx] = obj[i]; \ - } \ -} while (0) - -#define ENQUEUE_PTRS_128(r, ring_start, prod_head, obj_table, n) do { \ - unsigned int i; \ - const uint32_t size = (r)->size; \ - uint32_t idx = prod_head & (r)->mask; \ - __uint128_t *ring = (__uint128_t *)ring_start; \ - __uint128_t *obj = (__uint128_t *)obj_table; \ - if (likely(idx + n < size)) { \ - for (i = 0; i < (n >> 1); i += 2, idx += 2) { \ - ring[idx] = obj[i]; \ - ring[idx + 1] = obj[i + 1]; \ - } \ - switch (n & 0x1) { \ + ring[nr_idx++] = obj[i++]; /* fallthrough */ \ case 1: \ - ring[idx++] = obj[i++]; \ + ring[nr_idx++] = obj[i++]; /* fallthrough */ \ } \ } else { \ - for (i = 0; idx < size; i++, idx++)\ - ring[idx] = obj[i]; \ - for (idx = 0; i < n; i++, idx++) \ - ring[idx] = obj[i]; \ + uint32_t nr_seg0 = seg0 * (esize / sizeof(uint32_t)); \ + uint32_t nr_seg1 = nr_n - nr_seg0; \ + for (i = 0; i < nr_seg0; i++, nr_idx++)\ + ring[nr_idx] = obj[i]; \ + for (j = 0; j < nr_seg1; i++, j++) \ + ring[j] = obj[i]; \ } \ } while (0) -/* the actual copy of pointers on the ring to obj_table. - * Placed here since identical code needed in both - * single and multi consumer dequeue functions. - */ -#define DEQUEUE_PTRS_ELEM(r, ring_start, cons_head, obj_table, esize, n) do { \ - if (esize == 4) \ - DEQUEUE_PTRS_32(r, ring_start, cons_head, obj_table, n); \ - else if (esize == 8) \ - DEQUEUE_PTRS_64(r, ring_start, cons_head, obj_table, n); \ - else if (esize == 16) \ - DEQUEUE_PTRS_128(r, ring_start, cons_head, obj_table, n); \ -} while (0) - -#define DEQUEUE_PTRS_32(r, ring_start, cons_head, obj_table, n) do { \ - unsigned int i; \ +#define DEQUEUE_PTRS_GEN(r, ring_start, cons_head, obj_table, esize, n) do { \ + unsigned int i, j; \ uint32_t idx = cons_head & (r)->mask; \ const uint32_t size = (r)->size; \ uint32_t *ring = (uint32_t *)ring_start; \ uint32_t *obj = (uint32_t *)obj_table; \ - if (likely(idx + n < size)) { \ - for (i = 0; i < (n & (~(uint32_t)0x7)); i += 8, idx += 8) {\ - obj[i] = ring[idx]; \ - obj[i + 1] = ring[idx + 1]; \ - obj[i + 2] = ring[idx + 2]; \ - obj[i + 3] = ring[idx + 3]; \ - obj[i + 4] = ring[idx + 4]; \ - obj[i + 5] = ring[idx + 5]; \ - obj[i + 6] = ring[idx + 6]; \ - obj[i + 7] = ring[idx + 7]; \ + uint32_t nr_n = n * (esize / sizeof(uint32_t)); \ + uint32_t nr_idx = idx * (esize / sizeof(uint32_t)); \ + uint32_t seg0 = size - idx; \ + if (likely(n < seg0)) { \ + for (i = 0; i < (nr_n & ((~(unsigned)0x7))); \ + i += 8, nr_idx += 8) { \ + memcpy(obj + i, ring + nr_idx, 8 * sizeof (uint32_t)); \ } \ - switch (n & 0x7) { \ + switch (nr_n & 0x7) { \ case 7: \ - obj[i++] = ring[idx++]; /* fallthrough */ \ + obj[i++] = ring[nr_idx++]; /* fallthrough */ \ case 6: \ - obj[i++] = ring[idx++]; /* fallthrough */ \ + obj[i++] = ring[nr_idx++]; /* fallthrough */ \ case 5: \ - obj[i++] = ring[idx++]; /* fallthrough */ \ + obj[i++] = ring[nr_idx++]; /* fallthrough */ \ case 4: \ - obj[i++] = ring[idx++]; /* fallthrough */ \ + obj[i++] = ring[nr_idx++]; /* fallthrough */ \ case 3: \ - obj[i++] = ring[idx++]; /* fallthrough */ \ + obj[i++] = ring[nr_idx++]; /* fallthrough */ \ case 2: \ - obj[i++] = ring[idx++]; /* fallthrough */ \ - case 1: \ - obj[i++] = ring[idx++]; /* fallthrough */ \ - } \ - } else { \ - for (i = 0; idx < size; i++, idx++) \ - obj[i] = ring[idx]; \ - for (idx = 0; i < n; i++, idx++) \ - obj[i] = ring[idx]; \ - } \ -} while (0) - -#define DEQUEUE_PTRS_64(r, ring_start, cons_head, obj_table, n) do { \ - unsigned int i; \ - uint32_t idx = cons_head & (r)->mask; \ - const uint32_t size = (r)->size; \ - uint64_t *ring = (uint64_t *)ring_start; \ - uint64_t *obj = (uint64_t *)obj_table; \ - if (likely(idx + n < size)) { \ - for (i = 0; i < (n & (~(uint32_t)0x3)); i += 4, idx += 4) {\ - obj[i] = ring[idx]; \ - obj[i + 1] = ring[idx + 1]; \ - obj[i + 2] = ring[idx + 2]; \ - obj[i + 3] = ring[idx + 3]; \ - } \ - switch (n & 0x3) { \ - case 3: \ - obj[i++] = ring[idx++]; /* fallthrough */ \ - case 2: \ - obj[i++] = ring[idx++]; /* fallthrough */ \ - case 1: \ - obj[i++] = ring[idx++]; \ - } \ - } else { \ - for (i = 0; idx < size; i++, idx++) \ - obj[i] = ring[idx]; \ - for (idx = 0; i < n; i++, idx++) \ - obj[i] = ring[idx]; \ - } \ -} while (0) - -#define DEQUEUE_PTRS_128(r, ring_start, cons_head, obj_table, n) do { \ - unsigned int i; \ - uint32_t idx = cons_head & (r)->mask; \ - const uint32_t size = (r)->size; \ - __uint128_t *ring = (__uint128_t *)ring_start; \ - __uint128_t *obj = (__uint128_t *)obj_table; \ - if (likely(idx + n < size)) { \ - for (i = 0; i < (n >> 1); i += 2, idx += 2) { \ - obj[i] = ring[idx]; \ - obj[i + 1] = ring[idx + 1]; \ - } \ - switch (n & 0x1) { \ + obj[i++] = ring[nr_idx++]; /* fallthrough */ \ case 1: \ - obj[i++] = ring[idx++]; /* fallthrough */ \ + obj[i++] = ring[nr_idx++]; /* fallthrough */ \ } \ } else { \ - for (i = 0; idx < size; i++, idx++) \ - obj[i] = ring[idx]; \ - for (idx = 0; i < n; i++, idx++) \ - obj[i] = ring[idx]; \ + uint32_t nr_seg0 = seg0 * (esize / sizeof(uint32_t)); \ + uint32_t nr_seg1 = nr_n - nr_seg0; \ + for (i = 0; i < nr_seg0; i++, nr_idx++)\ + obj[i] = ring[nr_idx];\ + for (j = 0; j < nr_seg1; i++, j++) \ + obj[i] = ring[j]; \ } \ } while (0) @@ -373,7 +242,7 @@ __rte_ring_do_enqueue_elem(struct rte_ring *r, void * const obj_table, if (n == 0) goto end; - ENQUEUE_PTRS_ELEM(r, &r[1], prod_head, obj_table, esize, n); + ENQUEUE_PTRS_GEN(r, &r[1], prod_head, obj_table, esize, n); update_tail(&r->prod, prod_head, prod_next, is_sp, 1); end: @@ -420,7 +289,7 @@ __rte_ring_do_dequeue_elem(struct rte_ring *r, void *obj_table, if (n == 0) goto end; - DEQUEUE_PTRS_ELEM(r, &r[1], cons_head, obj_table, esize, n); + DEQUEUE_PTRS_GEN(r, &r[1], cons_head, obj_table, esize, n); update_tail(&r->cons, cons_head, cons_next, is_sc, 0); -- 2.17.1