* [PATCH 1/2] eventdev/dma: reorganize event DMA ops @ 2024-04-06 10:13 pbhagavatula 2024-04-06 10:13 ` [PATCH 2/2] dma/cnxk: remove completion pool pbhagavatula ` (2 more replies) 0 siblings, 3 replies; 17+ messages in thread From: pbhagavatula @ 2024-04-06 10:13 UTC (permalink / raw) To: jerinj, Amit Prakash Shukla, Vamsi Attunuru; +Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Re-organize event DMA ops structure to allow holding source and destination pointers without the need for additional memory, the mempool allocating memory for rte_event_dma_adapter_ops can size the structure to accommodate all the needed source and destination pointers. Add multiple words for holding user metadata, adapter implementation specific metadata and event metadata. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- app/test-eventdev/test_perf_common.c | 26 ++++-------- app/test/test_event_dma_adapter.c | 20 +++------ doc/guides/prog_guide/event_dma_adapter.rst | 2 +- drivers/dma/cnxk/cnxk_dmadev_fp.c | 39 +++++++---------- lib/eventdev/rte_event_dma_adapter.c | 27 ++++-------- lib/eventdev/rte_event_dma_adapter.h | 46 +++++++++++++++------ 6 files changed, 72 insertions(+), 88 deletions(-) diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c index 93e6132de8de..00fcac716304 100644 --- a/app/test-eventdev/test_perf_common.c +++ b/app/test-eventdev/test_perf_common.c @@ -1503,7 +1503,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, prod = 0; for (; port < perf_nb_event_ports(opt); port++) { struct prod_data *p = &t->prod[port]; - struct rte_event *response_info; uint32_t flow_id; p->dev_id = opt->dev_id; @@ -1523,13 +1522,10 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, for (flow_id = 0; flow_id < t->nb_flows; flow_id++) { rte_mempool_get(t->da_op_pool, (void **)&op); - op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - - op->src_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); - op->dst_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); - op->src_seg->length = 1024; - op->dst_seg->length = 1024; + op->src_dst_seg[0].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); + op->src_dst_seg[1].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); + op->src_dst_seg[0].length = 1024; + op->src_dst_seg[1].length = 1024; op->nb_src = 1; op->nb_dst = 1; op->flags = RTE_DMA_OP_FLAG_SUBMIT; @@ -1537,12 +1533,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, op->dma_dev_id = dma_dev_id; op->vchan = vchan_id; - response_info = (struct rte_event *)((uint8_t *)op + - sizeof(struct rte_event_dma_adapter_op)); - response_info->queue_id = p->queue_id; - response_info->sched_type = RTE_SCHED_TYPE_ATOMIC; - response_info->flow_id = flow_id; - p->da.dma_op[flow_id] = op; } @@ -2036,7 +2026,7 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt) return -ENODEV; } - elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event); + elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2); t->da_op_pool = rte_mempool_create("dma_op_pool", opt->pool_sz, elt_size, 256, 0, NULL, NULL, NULL, NULL, rte_socket_id(), 0); if (t->da_op_pool == NULL) { @@ -2085,10 +2075,8 @@ perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt) for (flow_id = 0; flow_id < t->nb_flows; flow_id++) { op = p->da.dma_op[flow_id]; - rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_seg->addr); - rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->dst_seg->addr); - rte_free(op->src_seg); - rte_free(op->dst_seg); + rte_pktmbuf_free((struct rte_mbuf *)op->src_dst_seg[0].addr); + rte_pktmbuf_free((struct rte_mbuf *)op->src_dst_seg[1].addr); rte_mempool_put(op->op_mp, op); } diff --git a/app/test/test_event_dma_adapter.c b/app/test/test_event_dma_adapter.c index 35b417b69f7b..d9dff4ff7d3f 100644 --- a/app/test/test_event_dma_adapter.c +++ b/app/test/test_event_dma_adapter.c @@ -235,7 +235,6 @@ test_op_forward_mode(void) struct rte_mbuf *dst_mbuf[TEST_MAX_OP]; struct rte_event_dma_adapter_op *op; struct rte_event ev[TEST_MAX_OP]; - struct rte_event response_info; int ret, i; ret = rte_pktmbuf_alloc_bulk(params.src_mbuf_pool, src_mbuf, TEST_MAX_OP); @@ -253,14 +252,11 @@ test_op_forward_mode(void) rte_mempool_get(params.op_mpool, (void **)&op); TEST_ASSERT_NOT_NULL(op, "Failed to allocate dma operation struct\n"); - op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - /* Update Op */ - op->src_seg->addr = rte_pktmbuf_iova(src_mbuf[i]); - op->dst_seg->addr = rte_pktmbuf_iova(dst_mbuf[i]); - op->src_seg->length = PACKET_LENGTH; - op->dst_seg->length = PACKET_LENGTH; + op->src_dst_seg[0].addr = rte_pktmbuf_iova(src_mbuf[i]); + op->src_dst_seg[1].addr = rte_pktmbuf_iova(dst_mbuf[i]); + op->src_dst_seg[0].length = PACKET_LENGTH; + op->src_dst_seg[1].length = PACKET_LENGTH; op->nb_src = 1; op->nb_dst = 1; op->flags = RTE_DMA_OP_FLAG_SUBMIT; @@ -268,10 +264,6 @@ test_op_forward_mode(void) op->dma_dev_id = TEST_DMA_DEV_ID; op->vchan = TEST_DMA_VCHAN_ID; - response_info.event = dma_response_info.event; - rte_memcpy((uint8_t *)op + sizeof(struct rte_event_dma_adapter_op), &response_info, - sizeof(struct rte_event)); - /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */ memset(&ev[i], 0, sizeof(struct rte_event)); ev[i].event = 0; @@ -294,8 +286,6 @@ test_op_forward_mode(void) TEST_ASSERT_EQUAL(ret, 0, "Data mismatch for dma adapter\n"); - rte_free(op->src_seg); - rte_free(op->dst_seg); rte_mempool_put(op->op_mp, op); } @@ -400,7 +390,7 @@ configure_dmadev(void) rte_socket_id()); RTE_TEST_ASSERT_NOT_NULL(params.dst_mbuf_pool, "Can't create DMA_DST_MBUFPOOL\n"); - elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event); + elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2); params.op_mpool = rte_mempool_create("EVENT_DMA_OP_POOL", DMA_OP_POOL_SIZE, elt_size, 0, 0, NULL, NULL, NULL, NULL, rte_socket_id(), 0); RTE_TEST_ASSERT_NOT_NULL(params.op_mpool, "Can't create DMA_OP_POOL\n"); diff --git a/doc/guides/prog_guide/event_dma_adapter.rst b/doc/guides/prog_guide/event_dma_adapter.rst index 3443b6a8032e..1fb9b0a07b87 100644 --- a/doc/guides/prog_guide/event_dma_adapter.rst +++ b/doc/guides/prog_guide/event_dma_adapter.rst @@ -144,7 +144,7 @@ on which it enqueues events towards the DMA adapter using ``rte_event_enqueue_bu uint32_t cap; int ret; - /* Fill in event info and update event_ptr with rte_dma_op */ + /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */ memset(&ev, 0, sizeof(ev)); . . diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c index f6562b603e45..9f7f9b2eed0e 100644 --- a/drivers/dma/cnxk/cnxk_dmadev_fp.c +++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c @@ -457,7 +457,6 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; - struct rte_event *rsp_info; struct cn10k_sso_hws *work; uint16_t nb_src, nb_dst; rte_mcslock_t mcs_lock_me; @@ -469,9 +468,7 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) for (count = 0; count < nb_events; count++) { op = ev[count].event_ptr; - rsp_info = (struct rte_event *)((uint8_t *)op + - sizeof(struct rte_event_dma_adapter_op)); - dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) @@ -488,15 +485,14 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54); hdr[0] |= (nb_dst << 6) | nb_src; hdr[1] = ((uint64_t)comp_ptr); - hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event); + hdr[2] = cnxk_dma_adapter_format_event(ev[count].event); - src = &op->src_seg[0]; - dst = &op->dst_seg[0]; + src = &op->src_dst_seg[0]; + dst = &op->src_dst_seg[op->nb_src]; if (CNXK_TAG_IS_HEAD(work->gw_rdata) || ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) && - (rsp_info->sched_type & DPI_HDR_TT_MASK) == - RTE_SCHED_TYPE_ORDERED)) + (ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)) roc_sso_hws_head_wait(work->base); rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); @@ -566,12 +562,12 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event * For all other cases, src pointers are first pointers. */ if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { - fptr = &op->dst_seg[0]; - lptr = &op->src_seg[0]; + fptr = &op->src_dst_seg[nb_src]; + lptr = &op->src_dst_seg[0]; RTE_SWAP(nb_src, nb_dst); } else { - fptr = &op->src_seg[0]; - lptr = &op->dst_seg[0]; + fptr = &op->src_dst_seg[0]; + lptr = &op->src_dst_seg[nb_src]; } hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; @@ -612,7 +608,6 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; - struct rte_event *rsp_info; struct cn9k_sso_hws *work; uint16_t nb_src, nb_dst; rte_mcslock_t mcs_lock_me; @@ -624,9 +619,7 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) for (count = 0; count < nb_events; count++) { op = ev[count].event_ptr; - rsp_info = (struct rte_event *)((uint8_t *)op + - sizeof(struct rte_event_dma_adapter_op)); - dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) @@ -647,18 +640,18 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) * For all other cases, src pointers are first pointers. */ if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { - fptr = &op->dst_seg[0]; - lptr = &op->src_seg[0]; + fptr = &op->src_dst_seg[nb_src]; + lptr = &op->src_dst_seg[0]; RTE_SWAP(nb_src, nb_dst); } else { - fptr = &op->src_seg[0]; - lptr = &op->dst_seg[0]; + fptr = &op->src_dst_seg[0]; + lptr = &op->src_dst_seg[nb_src]; } hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; - hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event); + hdr[0] |= cnxk_dma_adapter_format_event(ev[count].event); - if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED) + if ((ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED) roc_sso_hws_head_wait(work->base); rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); diff --git a/lib/eventdev/rte_event_dma_adapter.c b/lib/eventdev/rte_event_dma_adapter.c index 24dff556dbfe..e52ef46a1b36 100644 --- a/lib/eventdev/rte_event_dma_adapter.c +++ b/lib/eventdev/rte_event_dma_adapter.c @@ -236,9 +236,9 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter, uint16_t vchan, uint16_t *nb_ops_flushed) { struct rte_event_dma_adapter_op *op; - struct dma_vchan_info *tq; uint16_t *head = &bufp->head; uint16_t *tail = &bufp->tail; + struct dma_vchan_info *tq; uint16_t n; uint16_t i; int ret; @@ -257,11 +257,13 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter, for (i = 0; i < n; i++) { op = bufp->op_buffer[*head]; if (op->nb_src == 1 && op->nb_dst == 1) - ret = rte_dma_copy(dma_dev_id, vchan, op->src_seg->addr, op->dst_seg->addr, - op->src_seg->length, op->flags); + ret = rte_dma_copy(dma_dev_id, vchan, op->src_dst_seg[0].addr, + op->src_dst_seg[1].addr, op->src_dst_seg[0].length, + op->flags); else - ret = rte_dma_copy_sg(dma_dev_id, vchan, op->src_seg, op->dst_seg, - op->nb_src, op->nb_dst, op->flags); + ret = rte_dma_copy_sg(dma_dev_id, vchan, &op->src_dst_seg[0], + &op->src_dst_seg[op->nb_src], op->nb_src, op->nb_dst, + op->flags); if (ret < 0) break; @@ -511,8 +513,7 @@ edma_enq_to_dma_dev(struct event_dma_adapter *adapter, struct rte_event *ev, uns if (dma_op == NULL) continue; - /* Expected to have response info appended to dma_op. */ - + dma_op->impl_opaque[0] = ev[i].event; dma_dev_id = dma_op->dma_dev_id; vchan = dma_op->vchan; vchan_qinfo = &adapter->dma_devs[dma_dev_id].vchanq[vchan]; @@ -647,7 +648,6 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a uint8_t event_port_id = adapter->event_port_id; uint8_t event_dev_id = adapter->eventdev_id; struct rte_event events[DMA_BATCH_SIZE]; - struct rte_event *response_info; uint16_t nb_enqueued, nb_ev; uint8_t retry; uint8_t i; @@ -659,16 +659,7 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a for (i = 0; i < num; i++) { struct rte_event *ev = &events[nb_ev++]; - /* Expected to have response info appended to dma_op. */ - response_info = (struct rte_event *)((uint8_t *)ops[i] + - sizeof(struct rte_event_dma_adapter_op)); - if (unlikely(response_info == NULL)) { - if (ops[i] != NULL && ops[i]->op_mp != NULL) - rte_mempool_put(ops[i]->op_mp, ops[i]); - continue; - } - - rte_memcpy(ev, response_info, sizeof(struct rte_event)); + ev->event = ops[i]->impl_opaque[0]; ev->event_ptr = ops[i]; ev->event_type = RTE_EVENT_TYPE_DMADEV; if (adapter->implicit_release_disabled) diff --git a/lib/eventdev/rte_event_dma_adapter.h b/lib/eventdev/rte_event_dma_adapter.h index e924ab673df7..048ddba3f354 100644 --- a/lib/eventdev/rte_event_dma_adapter.h +++ b/lib/eventdev/rte_event_dma_adapter.h @@ -157,24 +157,46 @@ extern "C" { * instance. */ struct rte_event_dma_adapter_op { - struct rte_dma_sge *src_seg; - /**< Source segments. */ - struct rte_dma_sge *dst_seg; - /**< Destination segments. */ - uint16_t nb_src; - /**< Number of source segments. */ - uint16_t nb_dst; - /**< Number of destination segments. */ uint64_t flags; /**< Flags related to the operation. * @see RTE_DMA_OP_FLAG_* */ - int16_t dma_dev_id; - /**< DMA device ID to be used */ - uint16_t vchan; - /**< DMA vchan ID to be used */ struct rte_mempool *op_mp; /**< Mempool from which op is allocated. */ + enum rte_dma_status_code status; + /**< Status code for this operation. */ + uint32_t rsvd; + /**< Reserved for future use. */ + uint64_t impl_opaque[2]; + /**< Implementation-specific opaque data. + * An dma device implementation use this field to hold + * implementation specific values to share between dequeue and enqueue + * operations. + * The application should not modify this field. + */ + uint64_t user_meta; + /**< Memory to store user specific metadata. + * The dma device implementation should not modify this area. + */ + uint64_t event_meta; + /**< Event metadata that defines event attributes when used in OP_NEW mode. + * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_NEW + * @see struct rte_event::event + */ + int16_t dma_dev_id; + /**< DMA device ID to be used with OP_FORWARD mode. + * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD + */ + uint16_t vchan; + /**< DMA vchan ID to be used with OP_FORWARD mode + * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD + */ + uint16_t nb_src; + /**< Number of source segments. */ + uint16_t nb_dst; + /**< Number of destination segments. */ + struct rte_dma_sge src_dst_seg[0]; + /**< Source and destination segments. */ }; /** -- 2.43.0 ^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 2/2] dma/cnxk: remove completion pool 2024-04-06 10:13 [PATCH 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula @ 2024-04-06 10:13 ` pbhagavatula 2024-04-16 8:56 ` Vamsi Krishna Attunuru 2024-04-17 5:58 ` [PATCH v2 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula 2024-05-16 7:36 ` [PATCH " Amit Prakash Shukla 2 siblings, 1 reply; 17+ messages in thread From: pbhagavatula @ 2024-04-06 10:13 UTC (permalink / raw) To: jerinj, Vamsi Attunuru, Pavan Nikhilesh, Shijith Thotton; +Cc: dev From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use DMA ops to store metadata, remove use of completion pool. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/dma/cnxk/cnxk_dmadev.c | 53 ++++++---------- drivers/dma/cnxk/cnxk_dmadev.h | 24 +------ drivers/dma/cnxk/cnxk_dmadev_fp.c | 79 +++++------------------- drivers/event/cnxk/cnxk_eventdev_adptr.c | 47 +++----------- 4 files changed, 45 insertions(+), 158 deletions(-) diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c index 4ab3cfbdf2cd..dfd722271327 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.c +++ b/drivers/dma/cnxk/cnxk_dmadev.c @@ -2,6 +2,8 @@ * Copyright (C) 2021 Marvell International Ltd. */ +#include <rte_event_dma_adapter.h> + #include <cnxk_dmadev.h> static int cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan); @@ -30,8 +32,7 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan) { struct cnxk_dpi_conf *dpi_conf; uint16_t num_vchans; - uint16_t max_desc; - int i, j; + int i; if (vchan == RTE_DMA_ALL_VCHAN) { num_vchans = dpivf->num_vchans; @@ -46,12 +47,6 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan) for (; i < num_vchans; i++) { dpi_conf = &dpivf->conf[i]; - max_desc = dpi_conf->c_desc.max_cnt + 1; - if (dpi_conf->c_desc.compl_ptr) { - for (j = 0; j < max_desc; j++) - rte_free(dpi_conf->c_desc.compl_ptr[j]); - } - rte_free(dpi_conf->c_desc.compl_ptr); dpi_conf->c_desc.compl_ptr = NULL; } @@ -261,7 +256,7 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, if (max_desc > CNXK_DPI_MAX_DESC) max_desc = CNXK_DPI_MAX_DESC; - size = (max_desc * sizeof(struct cnxk_dpi_compl_s *)); + size = (max_desc * sizeof(uint8_t) * CNXK_DPI_COMPL_OFFSET); dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0); if (dpi_conf->c_desc.compl_ptr == NULL) { @@ -269,16 +264,8 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, return -ENOMEM; } - for (i = 0; i < max_desc; i++) { - dpi_conf->c_desc.compl_ptr[i] = - rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0); - if (!dpi_conf->c_desc.compl_ptr[i]) { - plt_err("Failed to allocate for descriptor memory"); - return -ENOMEM; - } - - dpi_conf->c_desc.compl_ptr[i]->cdata = CNXK_DPI_REQ_CDATA; - } + for (i = 0; i < max_desc; i++) + dpi_conf->c_desc.compl_ptr[i * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; dpi_conf->c_desc.max_cnt = (max_desc - 1); @@ -301,10 +288,8 @@ cnxk_dmadev_start(struct rte_dma_dev *dev) dpi_conf->pnum_words = 0; dpi_conf->pending = 0; dpi_conf->desc_idx = 0; - for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) { - if (dpi_conf->c_desc.compl_ptr[j]) - dpi_conf->c_desc.compl_ptr[j]->cdata = CNXK_DPI_REQ_CDATA; - } + for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) + dpi_conf->c_desc.compl_ptr[j * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; nb_desc += dpi_conf->c_desc.max_cnt + 1; cnxk_stats_reset(dev, i); dpi_conf->completed_offset = 0; @@ -382,22 +367,22 @@ cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls, struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t status; int cnt; for (cnt = 0; cnt < nb_cpls; cnt++) { - comp_ptr = c_desc->compl_ptr[c_desc->head]; - - if (comp_ptr->cdata) { - if (comp_ptr->cdata == CNXK_DPI_REQ_CDATA) + status = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET]; + if (status) { + if (status == CNXK_DPI_REQ_CDATA) break; *has_error = 1; dpi_conf->stats.errors++; + c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = + CNXK_DPI_REQ_CDATA; CNXK_DPI_STRM_INC(*c_desc, head); break; } - - comp_ptr->cdata = CNXK_DPI_REQ_CDATA; + c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; CNXK_DPI_STRM_INC(*c_desc, head); } @@ -414,19 +399,17 @@ cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan, const uint16_t n struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc; - struct cnxk_dpi_compl_s *comp_ptr; int cnt; for (cnt = 0; cnt < nb_cpls; cnt++) { - comp_ptr = c_desc->compl_ptr[c_desc->head]; - status[cnt] = comp_ptr->cdata; + status[cnt] = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET]; if (status[cnt]) { if (status[cnt] == CNXK_DPI_REQ_CDATA) break; dpi_conf->stats.errors++; } - comp_ptr->cdata = CNXK_DPI_REQ_CDATA; + c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; CNXK_DPI_STRM_INC(*c_desc, head); } @@ -593,7 +576,7 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de rdpi = &dpivf->rdpi; rdpi->pci_dev = pci_dev; - rc = roc_dpi_dev_init(rdpi, offsetof(struct cnxk_dpi_compl_s, wqecs)); + rc = roc_dpi_dev_init(rdpi, offsetof(struct rte_event_dma_adapter_op, impl_opaque)); if (rc < 0) goto err_out_free; diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h index 610a360ba217..a80db333a0a2 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.h +++ b/drivers/dma/cnxk/cnxk_dmadev.h @@ -37,17 +37,12 @@ #define CNXK_DPI_MAX_CMD_SZ CNXK_DPI_CMD_LEN(CNXK_DPI_MAX_POINTER, \ CNXK_DPI_MAX_POINTER) #define CNXK_DPI_CHUNKS_FROM_DESC(cz, desc) (((desc) / (((cz) / 8) / CNXK_DPI_MAX_CMD_SZ)) + 1) - +#define CNXK_DPI_COMPL_OFFSET ROC_CACHE_LINE_SZ /* Set Completion data to 0xFF when request submitted, * upon successful request completion engine reset to completion status */ #define CNXK_DPI_REQ_CDATA 0xFF -/* Set Completion data to 0xDEADBEEF when request submitted for SSO. - * This helps differentiate if the dequeue is called after cnxk enueue. - */ -#define CNXK_DPI_REQ_SSO_CDATA 0xDEADBEEF - union cnxk_dpi_instr_cmd { uint64_t u; struct cn9k_dpi_instr_cmd { @@ -91,24 +86,11 @@ union cnxk_dpi_instr_cmd { } cn10k; }; -struct cnxk_dpi_compl_s { - uint64_t cdata; - void *op; - uint16_t dev_id; - uint16_t vchan; - uint32_t wqecs; -}; - struct cnxk_dpi_cdesc_data_s { - struct cnxk_dpi_compl_s **compl_ptr; uint16_t max_cnt; uint16_t head; uint16_t tail; -}; - -struct cnxk_dma_adapter_info { - bool enabled; /* Set if vchan queue is added to dma adapter. */ - struct rte_mempool *req_mp; /* DMA inflight request mempool. */ + uint8_t *compl_ptr; }; struct cnxk_dpi_conf { @@ -119,7 +101,7 @@ struct cnxk_dpi_conf { uint16_t desc_idx; struct rte_dma_stats stats; uint64_t completed_offset; - struct cnxk_dma_adapter_info adapter_info; + bool adapter_enabled; }; struct cnxk_dpi_vf_s { diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c index 9f7f9b2eed0e..38f4524439af 100644 --- a/drivers/dma/cnxk/cnxk_dmadev_fp.c +++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c @@ -245,14 +245,14 @@ cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t d struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD]; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; int rc; if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); cmd[0] = (1UL << 54) | (1UL << 48); @@ -301,7 +301,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; const struct rte_dma_sge *fptr, *lptr; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; uint64_t hdr[4]; int rc; @@ -309,7 +309,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); hdr[1] = dpi_conf->cmd.u | ((flags & RTE_DMA_OP_FLAG_AUTO_FREE) << 37); @@ -357,14 +357,14 @@ cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD]; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; int rc; if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); cmd[0] = dpi_conf->cmd.u | (1U << 6) | 1U; @@ -403,7 +403,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge { struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; uint64_t hdr[4]; int rc; @@ -411,7 +411,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); hdr[0] = dpi_conf->cmd.u | (nb_dst << 6) | nb_src; @@ -454,7 +454,6 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) { const struct rte_dma_sge *src, *dst; struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; struct cn10k_sso_hws *work; @@ -471,20 +470,12 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; - if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) - return count; - - comp_ptr->op = op; - comp_ptr->dev_id = op->dma_dev_id; - comp_ptr->vchan = op->vchan; - comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; - nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54); hdr[0] |= (nb_dst << 6) | nb_src; - hdr[1] = ((uint64_t)comp_ptr); + hdr[1] = (uint64_t)op; hdr[2] = cnxk_dma_adapter_format_event(ev[count].event); src = &op->src_dst_seg[0]; @@ -524,7 +515,6 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event { const struct rte_dma_sge *fptr, *lptr; struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cn9k_sso_hws_dual *work; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; @@ -544,16 +534,8 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; - if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) - return count; - - comp_ptr->op = op; - comp_ptr->dev_id = op->dma_dev_id; - comp_ptr->vchan = op->vchan; - comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; - hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36); - hdr[2] = (uint64_t)comp_ptr; + hdr[2] = (uint64_t)op; nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; @@ -605,7 +587,6 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) { const struct rte_dma_sge *fptr, *lptr; struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; struct cn9k_sso_hws *work; @@ -622,16 +603,8 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; - if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) - return count; - - comp_ptr->op = op; - comp_ptr->dev_id = op->dma_dev_id; - comp_ptr->vchan = op->vchan; - comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; - hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36); - hdr[2] = (uint64_t)comp_ptr; + hdr[2] = (uint64_t)op; nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; @@ -682,38 +655,20 @@ uintptr_t cnxk_dma_adapter_dequeue(uintptr_t get_work1) { struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; - rte_mcslock_t mcs_lock_me; - RTE_ATOMIC(uint8_t) *wqecs; - - comp_ptr = (struct cnxk_dpi_compl_s *)get_work1; - - /* Dequeue can be called without calling cnx_enqueue in case of - * dma_adapter. When its called from adapter, dma op will not be - * embedded in completion pointer. In those cases return op. - */ - if (comp_ptr->cdata != CNXK_DPI_REQ_SSO_CDATA) - return (uintptr_t)comp_ptr; - dpivf = rte_dma_fp_objs[comp_ptr->dev_id].dev_private; - dpi_conf = &dpivf->conf[comp_ptr->vchan]; + op = (struct rte_event_dma_adapter_op *)get_work1; + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpi_conf = &dpivf->conf[op->vchan]; - rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); - wqecs = (uint8_t __rte_atomic *)&comp_ptr->wqecs; - if (rte_atomic_load_explicit(wqecs, rte_memory_order_relaxed) != 0) - dpi_conf->stats.errors++; + if (rte_atomic_load_explicit(&op->impl_opaque[0], rte_memory_order_relaxed) != 0) + rte_atomic_fetch_add_explicit(&dpi_conf->stats.errors, 1, rte_memory_order_relaxed); /* Take into account errors also. This is similar to * cnxk_dmadev_completed_status(). */ - dpi_conf->stats.completed++; - rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me); - - op = (struct rte_event_dma_adapter_op *)comp_ptr->op; - - rte_mempool_put(dpi_conf->adapter_info.req_mp, comp_ptr); + rte_atomic_fetch_add_explicit(&dpi_conf->stats.completed, 1, rte_memory_order_relaxed); return (uintptr_t)op; } diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index a2a59b16c912..98db11ad61fa 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -739,31 +739,6 @@ cnxk_crypto_adapter_qp_del(const struct rte_cryptodev *cdev, return 0; } -static int -dma_adapter_vchan_setup(const int16_t dma_dev_id, struct cnxk_dpi_conf *vchan, - uint16_t vchan_id) -{ - char name[RTE_MEMPOOL_NAMESIZE]; - uint32_t cache_size, nb_req; - unsigned int req_size; - - snprintf(name, RTE_MEMPOOL_NAMESIZE, "cnxk_dma_req_%u:%u", dma_dev_id, vchan_id); - req_size = sizeof(struct cnxk_dpi_compl_s); - - nb_req = vchan->c_desc.max_cnt; - cache_size = 16; - nb_req += (cache_size * rte_lcore_count()); - - vchan->adapter_info.req_mp = rte_mempool_create(name, nb_req, req_size, cache_size, 0, - NULL, NULL, NULL, NULL, rte_socket_id(), 0); - if (vchan->adapter_info.req_mp == NULL) - return -ENOMEM; - - vchan->adapter_info.enabled = true; - - return 0; -} - int cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, const int16_t dma_dev_id, uint16_t vchan_id) @@ -772,7 +747,6 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, uint32_t adptr_xae_cnt = 0; struct cnxk_dpi_vf_s *dpivf; struct cnxk_dpi_conf *vchan; - int ret; dpivf = rte_dma_fp_objs[dma_dev_id].dev_private; if ((int16_t)vchan_id == -1) { @@ -780,19 +754,13 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) { vchan = &dpivf->conf[vchan_id]; - ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id); - if (ret) { - cnxk_dma_adapter_vchan_del(dma_dev_id, -1); - return ret; - } - adptr_xae_cnt += vchan->adapter_info.req_mp->size; + vchan->adapter_enabled = true; + adptr_xae_cnt += vchan->c_desc.max_cnt; } } else { vchan = &dpivf->conf[vchan_id]; - ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id); - if (ret) - return ret; - adptr_xae_cnt = vchan->adapter_info.req_mp->size; + vchan->adapter_enabled = true; + adptr_xae_cnt = vchan->c_desc.max_cnt; } /* Update dma adapter XAE count */ @@ -805,8 +773,7 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, static int dma_adapter_vchan_free(struct cnxk_dpi_conf *vchan) { - rte_mempool_free(vchan->adapter_info.req_mp); - vchan->adapter_info.enabled = false; + vchan->adapter_enabled = false; return 0; } @@ -823,12 +790,12 @@ cnxk_dma_adapter_vchan_del(const int16_t dma_dev_id, uint16_t vchan_id) for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) { vchan = &dpivf->conf[vchan_id]; - if (vchan->adapter_info.enabled) + if (vchan->adapter_enabled) dma_adapter_vchan_free(vchan); } } else { vchan = &dpivf->conf[vchan_id]; - if (vchan->adapter_info.enabled) + if (vchan->adapter_enabled) dma_adapter_vchan_free(vchan); } -- 2.43.0 ^ permalink raw reply [flat|nested] 17+ messages in thread
* RE: [PATCH 2/2] dma/cnxk: remove completion pool 2024-04-06 10:13 ` [PATCH 2/2] dma/cnxk: remove completion pool pbhagavatula @ 2024-04-16 8:56 ` Vamsi Krishna Attunuru 0 siblings, 0 replies; 17+ messages in thread From: Vamsi Krishna Attunuru @ 2024-04-16 8:56 UTC (permalink / raw) To: Pavan Nikhilesh Bhagavatula, Jerin Jacob, Pavan Nikhilesh Bhagavatula, Shijith Thotton Cc: dev > -----Original Message----- > From: pbhagavatula@marvell.com <pbhagavatula@marvell.com> > Sent: Saturday, April 6, 2024 3:43 PM > To: Jerin Jacob <jerinj@marvell.com>; Vamsi Krishna Attunuru > <vattunuru@marvell.com>; Pavan Nikhilesh Bhagavatula > <pbhagavatula@marvell.com>; Shijith Thotton <sthotton@marvell.com> > Cc: dev@dpdk.org > Subject: [PATCH 2/2] dma/cnxk: remove completion pool > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Use DMA ops to store metadata, remove use of completion pool. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > --- Acked-by: Vamsi Attunuru <vattunuru@marvell.com> ^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v2 1/2] eventdev/dma: reorganize event DMA ops 2024-04-06 10:13 [PATCH 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula 2024-04-06 10:13 ` [PATCH 2/2] dma/cnxk: remove completion pool pbhagavatula @ 2024-04-17 5:58 ` pbhagavatula 2024-04-17 5:58 ` [PATCH v2 2/2] dma/cnxk: remove completion pool pbhagavatula 2024-04-17 8:26 ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula 2024-05-16 7:36 ` [PATCH " Amit Prakash Shukla 2 siblings, 2 replies; 17+ messages in thread From: pbhagavatula @ 2024-04-17 5:58 UTC (permalink / raw) To: jerinj, Amit Prakash Shukla, Vamsi Attunuru; +Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Re-organize event DMA ops structure to allow holding source and destination pointers without the need for additional memory, the mempool allocating memory for rte_event_dma_adapter_ops can size the structure to accommodate all the needed source and destination pointers. Add multiple words for holding user metadata, adapter implementation specific metadata and event metadata. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- v2 Changes: - Fix 32bit compilation app/test-eventdev/test_perf_common.c | 26 ++++-------- app/test/test_event_dma_adapter.c | 20 +++------ doc/guides/prog_guide/event_dma_adapter.rst | 2 +- drivers/dma/cnxk/cnxk_dmadev_fp.c | 39 +++++++---------- lib/eventdev/rte_event_dma_adapter.c | 27 ++++-------- lib/eventdev/rte_event_dma_adapter.h | 46 +++++++++++++++------ 6 files changed, 72 insertions(+), 88 deletions(-) diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c index 93e6132de8..db0f9c1f3b 100644 --- a/app/test-eventdev/test_perf_common.c +++ b/app/test-eventdev/test_perf_common.c @@ -1503,7 +1503,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, prod = 0; for (; port < perf_nb_event_ports(opt); port++) { struct prod_data *p = &t->prod[port]; - struct rte_event *response_info; uint32_t flow_id; p->dev_id = opt->dev_id; @@ -1523,13 +1522,10 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, for (flow_id = 0; flow_id < t->nb_flows; flow_id++) { rte_mempool_get(t->da_op_pool, (void **)&op); - op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - - op->src_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); - op->dst_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); - op->src_seg->length = 1024; - op->dst_seg->length = 1024; + op->src_dst_seg[0].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); + op->src_dst_seg[1].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); + op->src_dst_seg[0].length = 1024; + op->src_dst_seg[1].length = 1024; op->nb_src = 1; op->nb_dst = 1; op->flags = RTE_DMA_OP_FLAG_SUBMIT; @@ -1537,12 +1533,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, op->dma_dev_id = dma_dev_id; op->vchan = vchan_id; - response_info = (struct rte_event *)((uint8_t *)op + - sizeof(struct rte_event_dma_adapter_op)); - response_info->queue_id = p->queue_id; - response_info->sched_type = RTE_SCHED_TYPE_ATOMIC; - response_info->flow_id = flow_id; - p->da.dma_op[flow_id] = op; } @@ -2036,7 +2026,7 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt) return -ENODEV; } - elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event); + elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2); t->da_op_pool = rte_mempool_create("dma_op_pool", opt->pool_sz, elt_size, 256, 0, NULL, NULL, NULL, NULL, rte_socket_id(), 0); if (t->da_op_pool == NULL) { @@ -2085,10 +2075,8 @@ perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt) for (flow_id = 0; flow_id < t->nb_flows; flow_id++) { op = p->da.dma_op[flow_id]; - rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_seg->addr); - rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->dst_seg->addr); - rte_free(op->src_seg); - rte_free(op->dst_seg); + rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[0].addr); + rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[1].addr); rte_mempool_put(op->op_mp, op); } diff --git a/app/test/test_event_dma_adapter.c b/app/test/test_event_dma_adapter.c index 35b417b69f..d9dff4ff7d 100644 --- a/app/test/test_event_dma_adapter.c +++ b/app/test/test_event_dma_adapter.c @@ -235,7 +235,6 @@ test_op_forward_mode(void) struct rte_mbuf *dst_mbuf[TEST_MAX_OP]; struct rte_event_dma_adapter_op *op; struct rte_event ev[TEST_MAX_OP]; - struct rte_event response_info; int ret, i; ret = rte_pktmbuf_alloc_bulk(params.src_mbuf_pool, src_mbuf, TEST_MAX_OP); @@ -253,14 +252,11 @@ test_op_forward_mode(void) rte_mempool_get(params.op_mpool, (void **)&op); TEST_ASSERT_NOT_NULL(op, "Failed to allocate dma operation struct\n"); - op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - /* Update Op */ - op->src_seg->addr = rte_pktmbuf_iova(src_mbuf[i]); - op->dst_seg->addr = rte_pktmbuf_iova(dst_mbuf[i]); - op->src_seg->length = PACKET_LENGTH; - op->dst_seg->length = PACKET_LENGTH; + op->src_dst_seg[0].addr = rte_pktmbuf_iova(src_mbuf[i]); + op->src_dst_seg[1].addr = rte_pktmbuf_iova(dst_mbuf[i]); + op->src_dst_seg[0].length = PACKET_LENGTH; + op->src_dst_seg[1].length = PACKET_LENGTH; op->nb_src = 1; op->nb_dst = 1; op->flags = RTE_DMA_OP_FLAG_SUBMIT; @@ -268,10 +264,6 @@ test_op_forward_mode(void) op->dma_dev_id = TEST_DMA_DEV_ID; op->vchan = TEST_DMA_VCHAN_ID; - response_info.event = dma_response_info.event; - rte_memcpy((uint8_t *)op + sizeof(struct rte_event_dma_adapter_op), &response_info, - sizeof(struct rte_event)); - /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */ memset(&ev[i], 0, sizeof(struct rte_event)); ev[i].event = 0; @@ -294,8 +286,6 @@ test_op_forward_mode(void) TEST_ASSERT_EQUAL(ret, 0, "Data mismatch for dma adapter\n"); - rte_free(op->src_seg); - rte_free(op->dst_seg); rte_mempool_put(op->op_mp, op); } @@ -400,7 +390,7 @@ configure_dmadev(void) rte_socket_id()); RTE_TEST_ASSERT_NOT_NULL(params.dst_mbuf_pool, "Can't create DMA_DST_MBUFPOOL\n"); - elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event); + elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2); params.op_mpool = rte_mempool_create("EVENT_DMA_OP_POOL", DMA_OP_POOL_SIZE, elt_size, 0, 0, NULL, NULL, NULL, NULL, rte_socket_id(), 0); RTE_TEST_ASSERT_NOT_NULL(params.op_mpool, "Can't create DMA_OP_POOL\n"); diff --git a/doc/guides/prog_guide/event_dma_adapter.rst b/doc/guides/prog_guide/event_dma_adapter.rst index 3443b6a803..1fb9b0a07b 100644 --- a/doc/guides/prog_guide/event_dma_adapter.rst +++ b/doc/guides/prog_guide/event_dma_adapter.rst @@ -144,7 +144,7 @@ on which it enqueues events towards the DMA adapter using ``rte_event_enqueue_bu uint32_t cap; int ret; - /* Fill in event info and update event_ptr with rte_dma_op */ + /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */ memset(&ev, 0, sizeof(ev)); . . diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c index f6562b603e..9f7f9b2eed 100644 --- a/drivers/dma/cnxk/cnxk_dmadev_fp.c +++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c @@ -457,7 +457,6 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; - struct rte_event *rsp_info; struct cn10k_sso_hws *work; uint16_t nb_src, nb_dst; rte_mcslock_t mcs_lock_me; @@ -469,9 +468,7 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) for (count = 0; count < nb_events; count++) { op = ev[count].event_ptr; - rsp_info = (struct rte_event *)((uint8_t *)op + - sizeof(struct rte_event_dma_adapter_op)); - dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) @@ -488,15 +485,14 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54); hdr[0] |= (nb_dst << 6) | nb_src; hdr[1] = ((uint64_t)comp_ptr); - hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event); + hdr[2] = cnxk_dma_adapter_format_event(ev[count].event); - src = &op->src_seg[0]; - dst = &op->dst_seg[0]; + src = &op->src_dst_seg[0]; + dst = &op->src_dst_seg[op->nb_src]; if (CNXK_TAG_IS_HEAD(work->gw_rdata) || ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) && - (rsp_info->sched_type & DPI_HDR_TT_MASK) == - RTE_SCHED_TYPE_ORDERED)) + (ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)) roc_sso_hws_head_wait(work->base); rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); @@ -566,12 +562,12 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event * For all other cases, src pointers are first pointers. */ if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { - fptr = &op->dst_seg[0]; - lptr = &op->src_seg[0]; + fptr = &op->src_dst_seg[nb_src]; + lptr = &op->src_dst_seg[0]; RTE_SWAP(nb_src, nb_dst); } else { - fptr = &op->src_seg[0]; - lptr = &op->dst_seg[0]; + fptr = &op->src_dst_seg[0]; + lptr = &op->src_dst_seg[nb_src]; } hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; @@ -612,7 +608,6 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; - struct rte_event *rsp_info; struct cn9k_sso_hws *work; uint16_t nb_src, nb_dst; rte_mcslock_t mcs_lock_me; @@ -624,9 +619,7 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) for (count = 0; count < nb_events; count++) { op = ev[count].event_ptr; - rsp_info = (struct rte_event *)((uint8_t *)op + - sizeof(struct rte_event_dma_adapter_op)); - dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) @@ -647,18 +640,18 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) * For all other cases, src pointers are first pointers. */ if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { - fptr = &op->dst_seg[0]; - lptr = &op->src_seg[0]; + fptr = &op->src_dst_seg[nb_src]; + lptr = &op->src_dst_seg[0]; RTE_SWAP(nb_src, nb_dst); } else { - fptr = &op->src_seg[0]; - lptr = &op->dst_seg[0]; + fptr = &op->src_dst_seg[0]; + lptr = &op->src_dst_seg[nb_src]; } hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; - hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event); + hdr[0] |= cnxk_dma_adapter_format_event(ev[count].event); - if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED) + if ((ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED) roc_sso_hws_head_wait(work->base); rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); diff --git a/lib/eventdev/rte_event_dma_adapter.c b/lib/eventdev/rte_event_dma_adapter.c index 24dff556db..e52ef46a1b 100644 --- a/lib/eventdev/rte_event_dma_adapter.c +++ b/lib/eventdev/rte_event_dma_adapter.c @@ -236,9 +236,9 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter, uint16_t vchan, uint16_t *nb_ops_flushed) { struct rte_event_dma_adapter_op *op; - struct dma_vchan_info *tq; uint16_t *head = &bufp->head; uint16_t *tail = &bufp->tail; + struct dma_vchan_info *tq; uint16_t n; uint16_t i; int ret; @@ -257,11 +257,13 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter, for (i = 0; i < n; i++) { op = bufp->op_buffer[*head]; if (op->nb_src == 1 && op->nb_dst == 1) - ret = rte_dma_copy(dma_dev_id, vchan, op->src_seg->addr, op->dst_seg->addr, - op->src_seg->length, op->flags); + ret = rte_dma_copy(dma_dev_id, vchan, op->src_dst_seg[0].addr, + op->src_dst_seg[1].addr, op->src_dst_seg[0].length, + op->flags); else - ret = rte_dma_copy_sg(dma_dev_id, vchan, op->src_seg, op->dst_seg, - op->nb_src, op->nb_dst, op->flags); + ret = rte_dma_copy_sg(dma_dev_id, vchan, &op->src_dst_seg[0], + &op->src_dst_seg[op->nb_src], op->nb_src, op->nb_dst, + op->flags); if (ret < 0) break; @@ -511,8 +513,7 @@ edma_enq_to_dma_dev(struct event_dma_adapter *adapter, struct rte_event *ev, uns if (dma_op == NULL) continue; - /* Expected to have response info appended to dma_op. */ - + dma_op->impl_opaque[0] = ev[i].event; dma_dev_id = dma_op->dma_dev_id; vchan = dma_op->vchan; vchan_qinfo = &adapter->dma_devs[dma_dev_id].vchanq[vchan]; @@ -647,7 +648,6 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a uint8_t event_port_id = adapter->event_port_id; uint8_t event_dev_id = adapter->eventdev_id; struct rte_event events[DMA_BATCH_SIZE]; - struct rte_event *response_info; uint16_t nb_enqueued, nb_ev; uint8_t retry; uint8_t i; @@ -659,16 +659,7 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a for (i = 0; i < num; i++) { struct rte_event *ev = &events[nb_ev++]; - /* Expected to have response info appended to dma_op. */ - response_info = (struct rte_event *)((uint8_t *)ops[i] + - sizeof(struct rte_event_dma_adapter_op)); - if (unlikely(response_info == NULL)) { - if (ops[i] != NULL && ops[i]->op_mp != NULL) - rte_mempool_put(ops[i]->op_mp, ops[i]); - continue; - } - - rte_memcpy(ev, response_info, sizeof(struct rte_event)); + ev->event = ops[i]->impl_opaque[0]; ev->event_ptr = ops[i]; ev->event_type = RTE_EVENT_TYPE_DMADEV; if (adapter->implicit_release_disabled) diff --git a/lib/eventdev/rte_event_dma_adapter.h b/lib/eventdev/rte_event_dma_adapter.h index e924ab673d..048ddba3f3 100644 --- a/lib/eventdev/rte_event_dma_adapter.h +++ b/lib/eventdev/rte_event_dma_adapter.h @@ -157,24 +157,46 @@ extern "C" { * instance. */ struct rte_event_dma_adapter_op { - struct rte_dma_sge *src_seg; - /**< Source segments. */ - struct rte_dma_sge *dst_seg; - /**< Destination segments. */ - uint16_t nb_src; - /**< Number of source segments. */ - uint16_t nb_dst; - /**< Number of destination segments. */ uint64_t flags; /**< Flags related to the operation. * @see RTE_DMA_OP_FLAG_* */ - int16_t dma_dev_id; - /**< DMA device ID to be used */ - uint16_t vchan; - /**< DMA vchan ID to be used */ struct rte_mempool *op_mp; /**< Mempool from which op is allocated. */ + enum rte_dma_status_code status; + /**< Status code for this operation. */ + uint32_t rsvd; + /**< Reserved for future use. */ + uint64_t impl_opaque[2]; + /**< Implementation-specific opaque data. + * An dma device implementation use this field to hold + * implementation specific values to share between dequeue and enqueue + * operations. + * The application should not modify this field. + */ + uint64_t user_meta; + /**< Memory to store user specific metadata. + * The dma device implementation should not modify this area. + */ + uint64_t event_meta; + /**< Event metadata that defines event attributes when used in OP_NEW mode. + * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_NEW + * @see struct rte_event::event + */ + int16_t dma_dev_id; + /**< DMA device ID to be used with OP_FORWARD mode. + * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD + */ + uint16_t vchan; + /**< DMA vchan ID to be used with OP_FORWARD mode + * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD + */ + uint16_t nb_src; + /**< Number of source segments. */ + uint16_t nb_dst; + /**< Number of destination segments. */ + struct rte_dma_sge src_dst_seg[0]; + /**< Source and destination segments. */ }; /** -- 2.25.1 ^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v2 2/2] dma/cnxk: remove completion pool 2024-04-17 5:58 ` [PATCH v2 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula @ 2024-04-17 5:58 ` pbhagavatula 2024-04-17 8:26 ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula 1 sibling, 0 replies; 17+ messages in thread From: pbhagavatula @ 2024-04-17 5:58 UTC (permalink / raw) To: jerinj, Vamsi Attunuru, Pavan Nikhilesh, Shijith Thotton; +Cc: dev From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use DMA ops to store metadata, remove use of completion pool. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Vamsi Attunuru <vattunuru@marvell.com> --- drivers/dma/cnxk/cnxk_dmadev.c | 53 ++++++---------- drivers/dma/cnxk/cnxk_dmadev.h | 24 +------ drivers/dma/cnxk/cnxk_dmadev_fp.c | 79 +++++------------------- drivers/event/cnxk/cnxk_eventdev_adptr.c | 47 +++----------- 4 files changed, 45 insertions(+), 158 deletions(-) diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c index 4ab3cfbdf2..dfd7222713 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.c +++ b/drivers/dma/cnxk/cnxk_dmadev.c @@ -2,6 +2,8 @@ * Copyright (C) 2021 Marvell International Ltd. */ +#include <rte_event_dma_adapter.h> + #include <cnxk_dmadev.h> static int cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan); @@ -30,8 +32,7 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan) { struct cnxk_dpi_conf *dpi_conf; uint16_t num_vchans; - uint16_t max_desc; - int i, j; + int i; if (vchan == RTE_DMA_ALL_VCHAN) { num_vchans = dpivf->num_vchans; @@ -46,12 +47,6 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan) for (; i < num_vchans; i++) { dpi_conf = &dpivf->conf[i]; - max_desc = dpi_conf->c_desc.max_cnt + 1; - if (dpi_conf->c_desc.compl_ptr) { - for (j = 0; j < max_desc; j++) - rte_free(dpi_conf->c_desc.compl_ptr[j]); - } - rte_free(dpi_conf->c_desc.compl_ptr); dpi_conf->c_desc.compl_ptr = NULL; } @@ -261,7 +256,7 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, if (max_desc > CNXK_DPI_MAX_DESC) max_desc = CNXK_DPI_MAX_DESC; - size = (max_desc * sizeof(struct cnxk_dpi_compl_s *)); + size = (max_desc * sizeof(uint8_t) * CNXK_DPI_COMPL_OFFSET); dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0); if (dpi_conf->c_desc.compl_ptr == NULL) { @@ -269,16 +264,8 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, return -ENOMEM; } - for (i = 0; i < max_desc; i++) { - dpi_conf->c_desc.compl_ptr[i] = - rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0); - if (!dpi_conf->c_desc.compl_ptr[i]) { - plt_err("Failed to allocate for descriptor memory"); - return -ENOMEM; - } - - dpi_conf->c_desc.compl_ptr[i]->cdata = CNXK_DPI_REQ_CDATA; - } + for (i = 0; i < max_desc; i++) + dpi_conf->c_desc.compl_ptr[i * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; dpi_conf->c_desc.max_cnt = (max_desc - 1); @@ -301,10 +288,8 @@ cnxk_dmadev_start(struct rte_dma_dev *dev) dpi_conf->pnum_words = 0; dpi_conf->pending = 0; dpi_conf->desc_idx = 0; - for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) { - if (dpi_conf->c_desc.compl_ptr[j]) - dpi_conf->c_desc.compl_ptr[j]->cdata = CNXK_DPI_REQ_CDATA; - } + for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) + dpi_conf->c_desc.compl_ptr[j * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; nb_desc += dpi_conf->c_desc.max_cnt + 1; cnxk_stats_reset(dev, i); dpi_conf->completed_offset = 0; @@ -382,22 +367,22 @@ cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls, struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t status; int cnt; for (cnt = 0; cnt < nb_cpls; cnt++) { - comp_ptr = c_desc->compl_ptr[c_desc->head]; - - if (comp_ptr->cdata) { - if (comp_ptr->cdata == CNXK_DPI_REQ_CDATA) + status = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET]; + if (status) { + if (status == CNXK_DPI_REQ_CDATA) break; *has_error = 1; dpi_conf->stats.errors++; + c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = + CNXK_DPI_REQ_CDATA; CNXK_DPI_STRM_INC(*c_desc, head); break; } - - comp_ptr->cdata = CNXK_DPI_REQ_CDATA; + c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; CNXK_DPI_STRM_INC(*c_desc, head); } @@ -414,19 +399,17 @@ cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan, const uint16_t n struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc; - struct cnxk_dpi_compl_s *comp_ptr; int cnt; for (cnt = 0; cnt < nb_cpls; cnt++) { - comp_ptr = c_desc->compl_ptr[c_desc->head]; - status[cnt] = comp_ptr->cdata; + status[cnt] = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET]; if (status[cnt]) { if (status[cnt] == CNXK_DPI_REQ_CDATA) break; dpi_conf->stats.errors++; } - comp_ptr->cdata = CNXK_DPI_REQ_CDATA; + c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; CNXK_DPI_STRM_INC(*c_desc, head); } @@ -593,7 +576,7 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de rdpi = &dpivf->rdpi; rdpi->pci_dev = pci_dev; - rc = roc_dpi_dev_init(rdpi, offsetof(struct cnxk_dpi_compl_s, wqecs)); + rc = roc_dpi_dev_init(rdpi, offsetof(struct rte_event_dma_adapter_op, impl_opaque)); if (rc < 0) goto err_out_free; diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h index 610a360ba2..a80db333a0 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.h +++ b/drivers/dma/cnxk/cnxk_dmadev.h @@ -37,17 +37,12 @@ #define CNXK_DPI_MAX_CMD_SZ CNXK_DPI_CMD_LEN(CNXK_DPI_MAX_POINTER, \ CNXK_DPI_MAX_POINTER) #define CNXK_DPI_CHUNKS_FROM_DESC(cz, desc) (((desc) / (((cz) / 8) / CNXK_DPI_MAX_CMD_SZ)) + 1) - +#define CNXK_DPI_COMPL_OFFSET ROC_CACHE_LINE_SZ /* Set Completion data to 0xFF when request submitted, * upon successful request completion engine reset to completion status */ #define CNXK_DPI_REQ_CDATA 0xFF -/* Set Completion data to 0xDEADBEEF when request submitted for SSO. - * This helps differentiate if the dequeue is called after cnxk enueue. - */ -#define CNXK_DPI_REQ_SSO_CDATA 0xDEADBEEF - union cnxk_dpi_instr_cmd { uint64_t u; struct cn9k_dpi_instr_cmd { @@ -91,24 +86,11 @@ union cnxk_dpi_instr_cmd { } cn10k; }; -struct cnxk_dpi_compl_s { - uint64_t cdata; - void *op; - uint16_t dev_id; - uint16_t vchan; - uint32_t wqecs; -}; - struct cnxk_dpi_cdesc_data_s { - struct cnxk_dpi_compl_s **compl_ptr; uint16_t max_cnt; uint16_t head; uint16_t tail; -}; - -struct cnxk_dma_adapter_info { - bool enabled; /* Set if vchan queue is added to dma adapter. */ - struct rte_mempool *req_mp; /* DMA inflight request mempool. */ + uint8_t *compl_ptr; }; struct cnxk_dpi_conf { @@ -119,7 +101,7 @@ struct cnxk_dpi_conf { uint16_t desc_idx; struct rte_dma_stats stats; uint64_t completed_offset; - struct cnxk_dma_adapter_info adapter_info; + bool adapter_enabled; }; struct cnxk_dpi_vf_s { diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c index 9f7f9b2eed..38f4524439 100644 --- a/drivers/dma/cnxk/cnxk_dmadev_fp.c +++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c @@ -245,14 +245,14 @@ cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t d struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD]; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; int rc; if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); cmd[0] = (1UL << 54) | (1UL << 48); @@ -301,7 +301,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; const struct rte_dma_sge *fptr, *lptr; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; uint64_t hdr[4]; int rc; @@ -309,7 +309,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); hdr[1] = dpi_conf->cmd.u | ((flags & RTE_DMA_OP_FLAG_AUTO_FREE) << 37); @@ -357,14 +357,14 @@ cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD]; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; int rc; if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); cmd[0] = dpi_conf->cmd.u | (1U << 6) | 1U; @@ -403,7 +403,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge { struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; uint64_t hdr[4]; int rc; @@ -411,7 +411,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); hdr[0] = dpi_conf->cmd.u | (nb_dst << 6) | nb_src; @@ -454,7 +454,6 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) { const struct rte_dma_sge *src, *dst; struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; struct cn10k_sso_hws *work; @@ -471,20 +470,12 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; - if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) - return count; - - comp_ptr->op = op; - comp_ptr->dev_id = op->dma_dev_id; - comp_ptr->vchan = op->vchan; - comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; - nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54); hdr[0] |= (nb_dst << 6) | nb_src; - hdr[1] = ((uint64_t)comp_ptr); + hdr[1] = (uint64_t)op; hdr[2] = cnxk_dma_adapter_format_event(ev[count].event); src = &op->src_dst_seg[0]; @@ -524,7 +515,6 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event { const struct rte_dma_sge *fptr, *lptr; struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cn9k_sso_hws_dual *work; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; @@ -544,16 +534,8 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; - if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) - return count; - - comp_ptr->op = op; - comp_ptr->dev_id = op->dma_dev_id; - comp_ptr->vchan = op->vchan; - comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; - hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36); - hdr[2] = (uint64_t)comp_ptr; + hdr[2] = (uint64_t)op; nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; @@ -605,7 +587,6 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) { const struct rte_dma_sge *fptr, *lptr; struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; struct cn9k_sso_hws *work; @@ -622,16 +603,8 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; - if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) - return count; - - comp_ptr->op = op; - comp_ptr->dev_id = op->dma_dev_id; - comp_ptr->vchan = op->vchan; - comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; - hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36); - hdr[2] = (uint64_t)comp_ptr; + hdr[2] = (uint64_t)op; nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; @@ -682,38 +655,20 @@ uintptr_t cnxk_dma_adapter_dequeue(uintptr_t get_work1) { struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; - rte_mcslock_t mcs_lock_me; - RTE_ATOMIC(uint8_t) *wqecs; - - comp_ptr = (struct cnxk_dpi_compl_s *)get_work1; - - /* Dequeue can be called without calling cnx_enqueue in case of - * dma_adapter. When its called from adapter, dma op will not be - * embedded in completion pointer. In those cases return op. - */ - if (comp_ptr->cdata != CNXK_DPI_REQ_SSO_CDATA) - return (uintptr_t)comp_ptr; - dpivf = rte_dma_fp_objs[comp_ptr->dev_id].dev_private; - dpi_conf = &dpivf->conf[comp_ptr->vchan]; + op = (struct rte_event_dma_adapter_op *)get_work1; + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpi_conf = &dpivf->conf[op->vchan]; - rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); - wqecs = (uint8_t __rte_atomic *)&comp_ptr->wqecs; - if (rte_atomic_load_explicit(wqecs, rte_memory_order_relaxed) != 0) - dpi_conf->stats.errors++; + if (rte_atomic_load_explicit(&op->impl_opaque[0], rte_memory_order_relaxed) != 0) + rte_atomic_fetch_add_explicit(&dpi_conf->stats.errors, 1, rte_memory_order_relaxed); /* Take into account errors also. This is similar to * cnxk_dmadev_completed_status(). */ - dpi_conf->stats.completed++; - rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me); - - op = (struct rte_event_dma_adapter_op *)comp_ptr->op; - - rte_mempool_put(dpi_conf->adapter_info.req_mp, comp_ptr); + rte_atomic_fetch_add_explicit(&dpi_conf->stats.completed, 1, rte_memory_order_relaxed); return (uintptr_t)op; } diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index a2a59b16c9..98db11ad61 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -739,31 +739,6 @@ cnxk_crypto_adapter_qp_del(const struct rte_cryptodev *cdev, return 0; } -static int -dma_adapter_vchan_setup(const int16_t dma_dev_id, struct cnxk_dpi_conf *vchan, - uint16_t vchan_id) -{ - char name[RTE_MEMPOOL_NAMESIZE]; - uint32_t cache_size, nb_req; - unsigned int req_size; - - snprintf(name, RTE_MEMPOOL_NAMESIZE, "cnxk_dma_req_%u:%u", dma_dev_id, vchan_id); - req_size = sizeof(struct cnxk_dpi_compl_s); - - nb_req = vchan->c_desc.max_cnt; - cache_size = 16; - nb_req += (cache_size * rte_lcore_count()); - - vchan->adapter_info.req_mp = rte_mempool_create(name, nb_req, req_size, cache_size, 0, - NULL, NULL, NULL, NULL, rte_socket_id(), 0); - if (vchan->adapter_info.req_mp == NULL) - return -ENOMEM; - - vchan->adapter_info.enabled = true; - - return 0; -} - int cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, const int16_t dma_dev_id, uint16_t vchan_id) @@ -772,7 +747,6 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, uint32_t adptr_xae_cnt = 0; struct cnxk_dpi_vf_s *dpivf; struct cnxk_dpi_conf *vchan; - int ret; dpivf = rte_dma_fp_objs[dma_dev_id].dev_private; if ((int16_t)vchan_id == -1) { @@ -780,19 +754,13 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) { vchan = &dpivf->conf[vchan_id]; - ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id); - if (ret) { - cnxk_dma_adapter_vchan_del(dma_dev_id, -1); - return ret; - } - adptr_xae_cnt += vchan->adapter_info.req_mp->size; + vchan->adapter_enabled = true; + adptr_xae_cnt += vchan->c_desc.max_cnt; } } else { vchan = &dpivf->conf[vchan_id]; - ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id); - if (ret) - return ret; - adptr_xae_cnt = vchan->adapter_info.req_mp->size; + vchan->adapter_enabled = true; + adptr_xae_cnt = vchan->c_desc.max_cnt; } /* Update dma adapter XAE count */ @@ -805,8 +773,7 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, static int dma_adapter_vchan_free(struct cnxk_dpi_conf *vchan) { - rte_mempool_free(vchan->adapter_info.req_mp); - vchan->adapter_info.enabled = false; + vchan->adapter_enabled = false; return 0; } @@ -823,12 +790,12 @@ cnxk_dma_adapter_vchan_del(const int16_t dma_dev_id, uint16_t vchan_id) for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) { vchan = &dpivf->conf[vchan_id]; - if (vchan->adapter_info.enabled) + if (vchan->adapter_enabled) dma_adapter_vchan_free(vchan); } } else { vchan = &dpivf->conf[vchan_id]; - if (vchan->adapter_info.enabled) + if (vchan->adapter_enabled) dma_adapter_vchan_free(vchan); } -- 2.25.1 ^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops 2024-04-17 5:58 ` [PATCH v2 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula 2024-04-17 5:58 ` [PATCH v2 2/2] dma/cnxk: remove completion pool pbhagavatula @ 2024-04-17 8:26 ` pbhagavatula 2024-04-17 8:26 ` [PATCH v3 2/2] dma/cnxk: remove completion pool pbhagavatula ` (2 more replies) 1 sibling, 3 replies; 17+ messages in thread From: pbhagavatula @ 2024-04-17 8:26 UTC (permalink / raw) To: jerinj, Amit Prakash Shukla, Vamsi Attunuru; +Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Re-organize event DMA ops structure to allow holding source and destination pointers without the need for additional memory, the mempool allocating memory for rte_event_dma_adapter_ops can size the structure to accommodate all the needed source and destination pointers. Add multiple words for holding user metadata, adapter implementation specific metadata and event metadata. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- v3 Changes: - Fix stdatomic compilation. v2 Changes: - Fix 32bit compilation app/test-eventdev/test_perf_common.c | 26 ++++-------- app/test/test_event_dma_adapter.c | 20 +++------ doc/guides/prog_guide/event_dma_adapter.rst | 2 +- drivers/dma/cnxk/cnxk_dmadev_fp.c | 39 +++++++---------- lib/eventdev/rte_event_dma_adapter.c | 27 ++++-------- lib/eventdev/rte_event_dma_adapter.h | 46 +++++++++++++++------ 6 files changed, 72 insertions(+), 88 deletions(-) diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c index 93e6132de8..db0f9c1f3b 100644 --- a/app/test-eventdev/test_perf_common.c +++ b/app/test-eventdev/test_perf_common.c @@ -1503,7 +1503,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, prod = 0; for (; port < perf_nb_event_ports(opt); port++) { struct prod_data *p = &t->prod[port]; - struct rte_event *response_info; uint32_t flow_id; p->dev_id = opt->dev_id; @@ -1523,13 +1522,10 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, for (flow_id = 0; flow_id < t->nb_flows; flow_id++) { rte_mempool_get(t->da_op_pool, (void **)&op); - op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - - op->src_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); - op->dst_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); - op->src_seg->length = 1024; - op->dst_seg->length = 1024; + op->src_dst_seg[0].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); + op->src_dst_seg[1].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); + op->src_dst_seg[0].length = 1024; + op->src_dst_seg[1].length = 1024; op->nb_src = 1; op->nb_dst = 1; op->flags = RTE_DMA_OP_FLAG_SUBMIT; @@ -1537,12 +1533,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, op->dma_dev_id = dma_dev_id; op->vchan = vchan_id; - response_info = (struct rte_event *)((uint8_t *)op + - sizeof(struct rte_event_dma_adapter_op)); - response_info->queue_id = p->queue_id; - response_info->sched_type = RTE_SCHED_TYPE_ATOMIC; - response_info->flow_id = flow_id; - p->da.dma_op[flow_id] = op; } @@ -2036,7 +2026,7 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt) return -ENODEV; } - elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event); + elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2); t->da_op_pool = rte_mempool_create("dma_op_pool", opt->pool_sz, elt_size, 256, 0, NULL, NULL, NULL, NULL, rte_socket_id(), 0); if (t->da_op_pool == NULL) { @@ -2085,10 +2075,8 @@ perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt) for (flow_id = 0; flow_id < t->nb_flows; flow_id++) { op = p->da.dma_op[flow_id]; - rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_seg->addr); - rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->dst_seg->addr); - rte_free(op->src_seg); - rte_free(op->dst_seg); + rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[0].addr); + rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[1].addr); rte_mempool_put(op->op_mp, op); } diff --git a/app/test/test_event_dma_adapter.c b/app/test/test_event_dma_adapter.c index 35b417b69f..d9dff4ff7d 100644 --- a/app/test/test_event_dma_adapter.c +++ b/app/test/test_event_dma_adapter.c @@ -235,7 +235,6 @@ test_op_forward_mode(void) struct rte_mbuf *dst_mbuf[TEST_MAX_OP]; struct rte_event_dma_adapter_op *op; struct rte_event ev[TEST_MAX_OP]; - struct rte_event response_info; int ret, i; ret = rte_pktmbuf_alloc_bulk(params.src_mbuf_pool, src_mbuf, TEST_MAX_OP); @@ -253,14 +252,11 @@ test_op_forward_mode(void) rte_mempool_get(params.op_mpool, (void **)&op); TEST_ASSERT_NOT_NULL(op, "Failed to allocate dma operation struct\n"); - op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - /* Update Op */ - op->src_seg->addr = rte_pktmbuf_iova(src_mbuf[i]); - op->dst_seg->addr = rte_pktmbuf_iova(dst_mbuf[i]); - op->src_seg->length = PACKET_LENGTH; - op->dst_seg->length = PACKET_LENGTH; + op->src_dst_seg[0].addr = rte_pktmbuf_iova(src_mbuf[i]); + op->src_dst_seg[1].addr = rte_pktmbuf_iova(dst_mbuf[i]); + op->src_dst_seg[0].length = PACKET_LENGTH; + op->src_dst_seg[1].length = PACKET_LENGTH; op->nb_src = 1; op->nb_dst = 1; op->flags = RTE_DMA_OP_FLAG_SUBMIT; @@ -268,10 +264,6 @@ test_op_forward_mode(void) op->dma_dev_id = TEST_DMA_DEV_ID; op->vchan = TEST_DMA_VCHAN_ID; - response_info.event = dma_response_info.event; - rte_memcpy((uint8_t *)op + sizeof(struct rte_event_dma_adapter_op), &response_info, - sizeof(struct rte_event)); - /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */ memset(&ev[i], 0, sizeof(struct rte_event)); ev[i].event = 0; @@ -294,8 +286,6 @@ test_op_forward_mode(void) TEST_ASSERT_EQUAL(ret, 0, "Data mismatch for dma adapter\n"); - rte_free(op->src_seg); - rte_free(op->dst_seg); rte_mempool_put(op->op_mp, op); } @@ -400,7 +390,7 @@ configure_dmadev(void) rte_socket_id()); RTE_TEST_ASSERT_NOT_NULL(params.dst_mbuf_pool, "Can't create DMA_DST_MBUFPOOL\n"); - elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event); + elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2); params.op_mpool = rte_mempool_create("EVENT_DMA_OP_POOL", DMA_OP_POOL_SIZE, elt_size, 0, 0, NULL, NULL, NULL, NULL, rte_socket_id(), 0); RTE_TEST_ASSERT_NOT_NULL(params.op_mpool, "Can't create DMA_OP_POOL\n"); diff --git a/doc/guides/prog_guide/event_dma_adapter.rst b/doc/guides/prog_guide/event_dma_adapter.rst index 3443b6a803..1fb9b0a07b 100644 --- a/doc/guides/prog_guide/event_dma_adapter.rst +++ b/doc/guides/prog_guide/event_dma_adapter.rst @@ -144,7 +144,7 @@ on which it enqueues events towards the DMA adapter using ``rte_event_enqueue_bu uint32_t cap; int ret; - /* Fill in event info and update event_ptr with rte_dma_op */ + /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */ memset(&ev, 0, sizeof(ev)); . . diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c index f6562b603e..9f7f9b2eed 100644 --- a/drivers/dma/cnxk/cnxk_dmadev_fp.c +++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c @@ -457,7 +457,6 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; - struct rte_event *rsp_info; struct cn10k_sso_hws *work; uint16_t nb_src, nb_dst; rte_mcslock_t mcs_lock_me; @@ -469,9 +468,7 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) for (count = 0; count < nb_events; count++) { op = ev[count].event_ptr; - rsp_info = (struct rte_event *)((uint8_t *)op + - sizeof(struct rte_event_dma_adapter_op)); - dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) @@ -488,15 +485,14 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54); hdr[0] |= (nb_dst << 6) | nb_src; hdr[1] = ((uint64_t)comp_ptr); - hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event); + hdr[2] = cnxk_dma_adapter_format_event(ev[count].event); - src = &op->src_seg[0]; - dst = &op->dst_seg[0]; + src = &op->src_dst_seg[0]; + dst = &op->src_dst_seg[op->nb_src]; if (CNXK_TAG_IS_HEAD(work->gw_rdata) || ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) && - (rsp_info->sched_type & DPI_HDR_TT_MASK) == - RTE_SCHED_TYPE_ORDERED)) + (ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)) roc_sso_hws_head_wait(work->base); rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); @@ -566,12 +562,12 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event * For all other cases, src pointers are first pointers. */ if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { - fptr = &op->dst_seg[0]; - lptr = &op->src_seg[0]; + fptr = &op->src_dst_seg[nb_src]; + lptr = &op->src_dst_seg[0]; RTE_SWAP(nb_src, nb_dst); } else { - fptr = &op->src_seg[0]; - lptr = &op->dst_seg[0]; + fptr = &op->src_dst_seg[0]; + lptr = &op->src_dst_seg[nb_src]; } hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; @@ -612,7 +608,6 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; - struct rte_event *rsp_info; struct cn9k_sso_hws *work; uint16_t nb_src, nb_dst; rte_mcslock_t mcs_lock_me; @@ -624,9 +619,7 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) for (count = 0; count < nb_events; count++) { op = ev[count].event_ptr; - rsp_info = (struct rte_event *)((uint8_t *)op + - sizeof(struct rte_event_dma_adapter_op)); - dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) @@ -647,18 +640,18 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) * For all other cases, src pointers are first pointers. */ if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { - fptr = &op->dst_seg[0]; - lptr = &op->src_seg[0]; + fptr = &op->src_dst_seg[nb_src]; + lptr = &op->src_dst_seg[0]; RTE_SWAP(nb_src, nb_dst); } else { - fptr = &op->src_seg[0]; - lptr = &op->dst_seg[0]; + fptr = &op->src_dst_seg[0]; + lptr = &op->src_dst_seg[nb_src]; } hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; - hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event); + hdr[0] |= cnxk_dma_adapter_format_event(ev[count].event); - if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED) + if ((ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED) roc_sso_hws_head_wait(work->base); rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); diff --git a/lib/eventdev/rte_event_dma_adapter.c b/lib/eventdev/rte_event_dma_adapter.c index 24dff556db..e52ef46a1b 100644 --- a/lib/eventdev/rte_event_dma_adapter.c +++ b/lib/eventdev/rte_event_dma_adapter.c @@ -236,9 +236,9 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter, uint16_t vchan, uint16_t *nb_ops_flushed) { struct rte_event_dma_adapter_op *op; - struct dma_vchan_info *tq; uint16_t *head = &bufp->head; uint16_t *tail = &bufp->tail; + struct dma_vchan_info *tq; uint16_t n; uint16_t i; int ret; @@ -257,11 +257,13 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter, for (i = 0; i < n; i++) { op = bufp->op_buffer[*head]; if (op->nb_src == 1 && op->nb_dst == 1) - ret = rte_dma_copy(dma_dev_id, vchan, op->src_seg->addr, op->dst_seg->addr, - op->src_seg->length, op->flags); + ret = rte_dma_copy(dma_dev_id, vchan, op->src_dst_seg[0].addr, + op->src_dst_seg[1].addr, op->src_dst_seg[0].length, + op->flags); else - ret = rte_dma_copy_sg(dma_dev_id, vchan, op->src_seg, op->dst_seg, - op->nb_src, op->nb_dst, op->flags); + ret = rte_dma_copy_sg(dma_dev_id, vchan, &op->src_dst_seg[0], + &op->src_dst_seg[op->nb_src], op->nb_src, op->nb_dst, + op->flags); if (ret < 0) break; @@ -511,8 +513,7 @@ edma_enq_to_dma_dev(struct event_dma_adapter *adapter, struct rte_event *ev, uns if (dma_op == NULL) continue; - /* Expected to have response info appended to dma_op. */ - + dma_op->impl_opaque[0] = ev[i].event; dma_dev_id = dma_op->dma_dev_id; vchan = dma_op->vchan; vchan_qinfo = &adapter->dma_devs[dma_dev_id].vchanq[vchan]; @@ -647,7 +648,6 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a uint8_t event_port_id = adapter->event_port_id; uint8_t event_dev_id = adapter->eventdev_id; struct rte_event events[DMA_BATCH_SIZE]; - struct rte_event *response_info; uint16_t nb_enqueued, nb_ev; uint8_t retry; uint8_t i; @@ -659,16 +659,7 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a for (i = 0; i < num; i++) { struct rte_event *ev = &events[nb_ev++]; - /* Expected to have response info appended to dma_op. */ - response_info = (struct rte_event *)((uint8_t *)ops[i] + - sizeof(struct rte_event_dma_adapter_op)); - if (unlikely(response_info == NULL)) { - if (ops[i] != NULL && ops[i]->op_mp != NULL) - rte_mempool_put(ops[i]->op_mp, ops[i]); - continue; - } - - rte_memcpy(ev, response_info, sizeof(struct rte_event)); + ev->event = ops[i]->impl_opaque[0]; ev->event_ptr = ops[i]; ev->event_type = RTE_EVENT_TYPE_DMADEV; if (adapter->implicit_release_disabled) diff --git a/lib/eventdev/rte_event_dma_adapter.h b/lib/eventdev/rte_event_dma_adapter.h index e924ab673d..048ddba3f3 100644 --- a/lib/eventdev/rte_event_dma_adapter.h +++ b/lib/eventdev/rte_event_dma_adapter.h @@ -157,24 +157,46 @@ extern "C" { * instance. */ struct rte_event_dma_adapter_op { - struct rte_dma_sge *src_seg; - /**< Source segments. */ - struct rte_dma_sge *dst_seg; - /**< Destination segments. */ - uint16_t nb_src; - /**< Number of source segments. */ - uint16_t nb_dst; - /**< Number of destination segments. */ uint64_t flags; /**< Flags related to the operation. * @see RTE_DMA_OP_FLAG_* */ - int16_t dma_dev_id; - /**< DMA device ID to be used */ - uint16_t vchan; - /**< DMA vchan ID to be used */ struct rte_mempool *op_mp; /**< Mempool from which op is allocated. */ + enum rte_dma_status_code status; + /**< Status code for this operation. */ + uint32_t rsvd; + /**< Reserved for future use. */ + uint64_t impl_opaque[2]; + /**< Implementation-specific opaque data. + * An dma device implementation use this field to hold + * implementation specific values to share between dequeue and enqueue + * operations. + * The application should not modify this field. + */ + uint64_t user_meta; + /**< Memory to store user specific metadata. + * The dma device implementation should not modify this area. + */ + uint64_t event_meta; + /**< Event metadata that defines event attributes when used in OP_NEW mode. + * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_NEW + * @see struct rte_event::event + */ + int16_t dma_dev_id; + /**< DMA device ID to be used with OP_FORWARD mode. + * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD + */ + uint16_t vchan; + /**< DMA vchan ID to be used with OP_FORWARD mode + * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD + */ + uint16_t nb_src; + /**< Number of source segments. */ + uint16_t nb_dst; + /**< Number of destination segments. */ + struct rte_dma_sge src_dst_seg[0]; + /**< Source and destination segments. */ }; /** -- 2.25.1 ^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v3 2/2] dma/cnxk: remove completion pool 2024-04-17 8:26 ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula @ 2024-04-17 8:26 ` pbhagavatula 2024-05-16 7:39 ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops Amit Prakash Shukla 2024-05-30 12:44 ` [PATCH v4 " pbhagavatula 2 siblings, 0 replies; 17+ messages in thread From: pbhagavatula @ 2024-04-17 8:26 UTC (permalink / raw) To: jerinj, Vamsi Attunuru, Pavan Nikhilesh, Shijith Thotton; +Cc: dev From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use DMA ops to store metadata, remove use of completion pool. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Vamsi Attunuru <vattunuru@marvell.com> --- drivers/dma/cnxk/cnxk_dmadev.c | 53 ++++++--------- drivers/dma/cnxk/cnxk_dmadev.h | 24 +------ drivers/dma/cnxk/cnxk_dmadev_fp.c | 82 ++++++------------------ drivers/event/cnxk/cnxk_eventdev_adptr.c | 47 ++------------ 4 files changed, 48 insertions(+), 158 deletions(-) diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c index 4ab3cfbdf2..dfd7222713 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.c +++ b/drivers/dma/cnxk/cnxk_dmadev.c @@ -2,6 +2,8 @@ * Copyright (C) 2021 Marvell International Ltd. */ +#include <rte_event_dma_adapter.h> + #include <cnxk_dmadev.h> static int cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan); @@ -30,8 +32,7 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan) { struct cnxk_dpi_conf *dpi_conf; uint16_t num_vchans; - uint16_t max_desc; - int i, j; + int i; if (vchan == RTE_DMA_ALL_VCHAN) { num_vchans = dpivf->num_vchans; @@ -46,12 +47,6 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan) for (; i < num_vchans; i++) { dpi_conf = &dpivf->conf[i]; - max_desc = dpi_conf->c_desc.max_cnt + 1; - if (dpi_conf->c_desc.compl_ptr) { - for (j = 0; j < max_desc; j++) - rte_free(dpi_conf->c_desc.compl_ptr[j]); - } - rte_free(dpi_conf->c_desc.compl_ptr); dpi_conf->c_desc.compl_ptr = NULL; } @@ -261,7 +256,7 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, if (max_desc > CNXK_DPI_MAX_DESC) max_desc = CNXK_DPI_MAX_DESC; - size = (max_desc * sizeof(struct cnxk_dpi_compl_s *)); + size = (max_desc * sizeof(uint8_t) * CNXK_DPI_COMPL_OFFSET); dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0); if (dpi_conf->c_desc.compl_ptr == NULL) { @@ -269,16 +264,8 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, return -ENOMEM; } - for (i = 0; i < max_desc; i++) { - dpi_conf->c_desc.compl_ptr[i] = - rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0); - if (!dpi_conf->c_desc.compl_ptr[i]) { - plt_err("Failed to allocate for descriptor memory"); - return -ENOMEM; - } - - dpi_conf->c_desc.compl_ptr[i]->cdata = CNXK_DPI_REQ_CDATA; - } + for (i = 0; i < max_desc; i++) + dpi_conf->c_desc.compl_ptr[i * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; dpi_conf->c_desc.max_cnt = (max_desc - 1); @@ -301,10 +288,8 @@ cnxk_dmadev_start(struct rte_dma_dev *dev) dpi_conf->pnum_words = 0; dpi_conf->pending = 0; dpi_conf->desc_idx = 0; - for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) { - if (dpi_conf->c_desc.compl_ptr[j]) - dpi_conf->c_desc.compl_ptr[j]->cdata = CNXK_DPI_REQ_CDATA; - } + for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) + dpi_conf->c_desc.compl_ptr[j * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; nb_desc += dpi_conf->c_desc.max_cnt + 1; cnxk_stats_reset(dev, i); dpi_conf->completed_offset = 0; @@ -382,22 +367,22 @@ cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls, struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t status; int cnt; for (cnt = 0; cnt < nb_cpls; cnt++) { - comp_ptr = c_desc->compl_ptr[c_desc->head]; - - if (comp_ptr->cdata) { - if (comp_ptr->cdata == CNXK_DPI_REQ_CDATA) + status = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET]; + if (status) { + if (status == CNXK_DPI_REQ_CDATA) break; *has_error = 1; dpi_conf->stats.errors++; + c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = + CNXK_DPI_REQ_CDATA; CNXK_DPI_STRM_INC(*c_desc, head); break; } - - comp_ptr->cdata = CNXK_DPI_REQ_CDATA; + c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; CNXK_DPI_STRM_INC(*c_desc, head); } @@ -414,19 +399,17 @@ cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan, const uint16_t n struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc; - struct cnxk_dpi_compl_s *comp_ptr; int cnt; for (cnt = 0; cnt < nb_cpls; cnt++) { - comp_ptr = c_desc->compl_ptr[c_desc->head]; - status[cnt] = comp_ptr->cdata; + status[cnt] = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET]; if (status[cnt]) { if (status[cnt] == CNXK_DPI_REQ_CDATA) break; dpi_conf->stats.errors++; } - comp_ptr->cdata = CNXK_DPI_REQ_CDATA; + c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; CNXK_DPI_STRM_INC(*c_desc, head); } @@ -593,7 +576,7 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de rdpi = &dpivf->rdpi; rdpi->pci_dev = pci_dev; - rc = roc_dpi_dev_init(rdpi, offsetof(struct cnxk_dpi_compl_s, wqecs)); + rc = roc_dpi_dev_init(rdpi, offsetof(struct rte_event_dma_adapter_op, impl_opaque)); if (rc < 0) goto err_out_free; diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h index 610a360ba2..a80db333a0 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.h +++ b/drivers/dma/cnxk/cnxk_dmadev.h @@ -37,17 +37,12 @@ #define CNXK_DPI_MAX_CMD_SZ CNXK_DPI_CMD_LEN(CNXK_DPI_MAX_POINTER, \ CNXK_DPI_MAX_POINTER) #define CNXK_DPI_CHUNKS_FROM_DESC(cz, desc) (((desc) / (((cz) / 8) / CNXK_DPI_MAX_CMD_SZ)) + 1) - +#define CNXK_DPI_COMPL_OFFSET ROC_CACHE_LINE_SZ /* Set Completion data to 0xFF when request submitted, * upon successful request completion engine reset to completion status */ #define CNXK_DPI_REQ_CDATA 0xFF -/* Set Completion data to 0xDEADBEEF when request submitted for SSO. - * This helps differentiate if the dequeue is called after cnxk enueue. - */ -#define CNXK_DPI_REQ_SSO_CDATA 0xDEADBEEF - union cnxk_dpi_instr_cmd { uint64_t u; struct cn9k_dpi_instr_cmd { @@ -91,24 +86,11 @@ union cnxk_dpi_instr_cmd { } cn10k; }; -struct cnxk_dpi_compl_s { - uint64_t cdata; - void *op; - uint16_t dev_id; - uint16_t vchan; - uint32_t wqecs; -}; - struct cnxk_dpi_cdesc_data_s { - struct cnxk_dpi_compl_s **compl_ptr; uint16_t max_cnt; uint16_t head; uint16_t tail; -}; - -struct cnxk_dma_adapter_info { - bool enabled; /* Set if vchan queue is added to dma adapter. */ - struct rte_mempool *req_mp; /* DMA inflight request mempool. */ + uint8_t *compl_ptr; }; struct cnxk_dpi_conf { @@ -119,7 +101,7 @@ struct cnxk_dpi_conf { uint16_t desc_idx; struct rte_dma_stats stats; uint64_t completed_offset; - struct cnxk_dma_adapter_info adapter_info; + bool adapter_enabled; }; struct cnxk_dpi_vf_s { diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c index 9f7f9b2eed..26591235c6 100644 --- a/drivers/dma/cnxk/cnxk_dmadev_fp.c +++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c @@ -245,14 +245,14 @@ cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t d struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD]; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; int rc; if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); cmd[0] = (1UL << 54) | (1UL << 48); @@ -301,7 +301,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; const struct rte_dma_sge *fptr, *lptr; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; uint64_t hdr[4]; int rc; @@ -309,7 +309,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); hdr[1] = dpi_conf->cmd.u | ((flags & RTE_DMA_OP_FLAG_AUTO_FREE) << 37); @@ -357,14 +357,14 @@ cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD]; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; int rc; if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); cmd[0] = dpi_conf->cmd.u | (1U << 6) | 1U; @@ -403,7 +403,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge { struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; uint64_t hdr[4]; int rc; @@ -411,7 +411,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); hdr[0] = dpi_conf->cmd.u | (nb_dst << 6) | nb_src; @@ -454,7 +454,6 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) { const struct rte_dma_sge *src, *dst; struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; struct cn10k_sso_hws *work; @@ -471,20 +470,12 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; - if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) - return count; - - comp_ptr->op = op; - comp_ptr->dev_id = op->dma_dev_id; - comp_ptr->vchan = op->vchan; - comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; - nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54); hdr[0] |= (nb_dst << 6) | nb_src; - hdr[1] = ((uint64_t)comp_ptr); + hdr[1] = (uint64_t)op; hdr[2] = cnxk_dma_adapter_format_event(ev[count].event); src = &op->src_dst_seg[0]; @@ -524,7 +515,6 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event { const struct rte_dma_sge *fptr, *lptr; struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cn9k_sso_hws_dual *work; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; @@ -544,16 +534,8 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; - if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) - return count; - - comp_ptr->op = op; - comp_ptr->dev_id = op->dma_dev_id; - comp_ptr->vchan = op->vchan; - comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; - hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36); - hdr[2] = (uint64_t)comp_ptr; + hdr[2] = (uint64_t)op; nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; @@ -605,7 +587,6 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) { const struct rte_dma_sge *fptr, *lptr; struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; struct cn9k_sso_hws *work; @@ -622,16 +603,8 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; - if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) - return count; - - comp_ptr->op = op; - comp_ptr->dev_id = op->dma_dev_id; - comp_ptr->vchan = op->vchan; - comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; - hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36); - hdr[2] = (uint64_t)comp_ptr; + hdr[2] = (uint64_t)op; nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; @@ -682,38 +655,23 @@ uintptr_t cnxk_dma_adapter_dequeue(uintptr_t get_work1) { struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; - rte_mcslock_t mcs_lock_me; - RTE_ATOMIC(uint8_t) *wqecs; - - comp_ptr = (struct cnxk_dpi_compl_s *)get_work1; - - /* Dequeue can be called without calling cnx_enqueue in case of - * dma_adapter. When its called from adapter, dma op will not be - * embedded in completion pointer. In those cases return op. - */ - if (comp_ptr->cdata != CNXK_DPI_REQ_SSO_CDATA) - return (uintptr_t)comp_ptr; - dpivf = rte_dma_fp_objs[comp_ptr->dev_id].dev_private; - dpi_conf = &dpivf->conf[comp_ptr->vchan]; + op = (struct rte_event_dma_adapter_op *)get_work1; + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpi_conf = &dpivf->conf[op->vchan]; - rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); - wqecs = (uint8_t __rte_atomic *)&comp_ptr->wqecs; - if (rte_atomic_load_explicit(wqecs, rte_memory_order_relaxed) != 0) - dpi_conf->stats.errors++; + if (rte_atomic_load_explicit((RTE_ATOMIC(uint64_t) *)&op->impl_opaque[0], + rte_memory_order_relaxed) != 0) + rte_atomic_fetch_add_explicit((RTE_ATOMIC(uint64_t) *)&dpi_conf->stats.errors, 1, + rte_memory_order_relaxed); /* Take into account errors also. This is similar to * cnxk_dmadev_completed_status(). */ - dpi_conf->stats.completed++; - rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me); - - op = (struct rte_event_dma_adapter_op *)comp_ptr->op; - - rte_mempool_put(dpi_conf->adapter_info.req_mp, comp_ptr); + rte_atomic_fetch_add_explicit((RTE_ATOMIC(uint64_t) *)&dpi_conf->stats.completed, 1, + rte_memory_order_relaxed); return (uintptr_t)op; } diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index a2a59b16c9..98db11ad61 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -739,31 +739,6 @@ cnxk_crypto_adapter_qp_del(const struct rte_cryptodev *cdev, return 0; } -static int -dma_adapter_vchan_setup(const int16_t dma_dev_id, struct cnxk_dpi_conf *vchan, - uint16_t vchan_id) -{ - char name[RTE_MEMPOOL_NAMESIZE]; - uint32_t cache_size, nb_req; - unsigned int req_size; - - snprintf(name, RTE_MEMPOOL_NAMESIZE, "cnxk_dma_req_%u:%u", dma_dev_id, vchan_id); - req_size = sizeof(struct cnxk_dpi_compl_s); - - nb_req = vchan->c_desc.max_cnt; - cache_size = 16; - nb_req += (cache_size * rte_lcore_count()); - - vchan->adapter_info.req_mp = rte_mempool_create(name, nb_req, req_size, cache_size, 0, - NULL, NULL, NULL, NULL, rte_socket_id(), 0); - if (vchan->adapter_info.req_mp == NULL) - return -ENOMEM; - - vchan->adapter_info.enabled = true; - - return 0; -} - int cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, const int16_t dma_dev_id, uint16_t vchan_id) @@ -772,7 +747,6 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, uint32_t adptr_xae_cnt = 0; struct cnxk_dpi_vf_s *dpivf; struct cnxk_dpi_conf *vchan; - int ret; dpivf = rte_dma_fp_objs[dma_dev_id].dev_private; if ((int16_t)vchan_id == -1) { @@ -780,19 +754,13 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) { vchan = &dpivf->conf[vchan_id]; - ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id); - if (ret) { - cnxk_dma_adapter_vchan_del(dma_dev_id, -1); - return ret; - } - adptr_xae_cnt += vchan->adapter_info.req_mp->size; + vchan->adapter_enabled = true; + adptr_xae_cnt += vchan->c_desc.max_cnt; } } else { vchan = &dpivf->conf[vchan_id]; - ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id); - if (ret) - return ret; - adptr_xae_cnt = vchan->adapter_info.req_mp->size; + vchan->adapter_enabled = true; + adptr_xae_cnt = vchan->c_desc.max_cnt; } /* Update dma adapter XAE count */ @@ -805,8 +773,7 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, static int dma_adapter_vchan_free(struct cnxk_dpi_conf *vchan) { - rte_mempool_free(vchan->adapter_info.req_mp); - vchan->adapter_info.enabled = false; + vchan->adapter_enabled = false; return 0; } @@ -823,12 +790,12 @@ cnxk_dma_adapter_vchan_del(const int16_t dma_dev_id, uint16_t vchan_id) for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) { vchan = &dpivf->conf[vchan_id]; - if (vchan->adapter_info.enabled) + if (vchan->adapter_enabled) dma_adapter_vchan_free(vchan); } } else { vchan = &dpivf->conf[vchan_id]; - if (vchan->adapter_info.enabled) + if (vchan->adapter_enabled) dma_adapter_vchan_free(vchan); } -- 2.25.1 ^ permalink raw reply [flat|nested] 17+ messages in thread
* RE: [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops 2024-04-17 8:26 ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula 2024-04-17 8:26 ` [PATCH v3 2/2] dma/cnxk: remove completion pool pbhagavatula @ 2024-05-16 7:39 ` Amit Prakash Shukla 2024-05-30 12:23 ` Jerin Jacob 2024-05-30 12:44 ` [PATCH v4 " pbhagavatula 2 siblings, 1 reply; 17+ messages in thread From: Amit Prakash Shukla @ 2024-05-16 7:39 UTC (permalink / raw) To: Pavan Nikhilesh Bhagavatula, Jerin Jacob, Vamsi Krishna Attunuru Cc: dev, Pavan Nikhilesh Bhagavatula > -----Original Message----- > From: pbhagavatula@marvell.com <pbhagavatula@marvell.com> > Sent: Wednesday, April 17, 2024 1:57 PM > To: Jerin Jacob <jerinj@marvell.com>; Amit Prakash Shukla > <amitprakashs@marvell.com>; Vamsi Krishna Attunuru > <vattunuru@marvell.com> > Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula > <pbhagavatula@marvell.com> > Subject: [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Re-organize event DMA ops structure to allow holding source and destination > pointers without the need for additional memory, the mempool allocating > memory for rte_event_dma_adapter_ops can size the structure to > accommodate all the needed source and destination pointers. > > Add multiple words for holding user metadata, adapter implementation > specific metadata and event metadata. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > --- > v3 Changes: > - Fix stdatomic compilation. > v2 Changes: > - Fix 32bit compilation > > app/test-eventdev/test_perf_common.c | 26 ++++-------- > app/test/test_event_dma_adapter.c | 20 +++------ > doc/guides/prog_guide/event_dma_adapter.rst | 2 +- > drivers/dma/cnxk/cnxk_dmadev_fp.c | 39 +++++++---------- > lib/eventdev/rte_event_dma_adapter.c | 27 ++++-------- > lib/eventdev/rte_event_dma_adapter.h | 46 +++++++++++++++------ > 6 files changed, 72 insertions(+), 88 deletions(-) > Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com> ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops 2024-05-16 7:39 ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops Amit Prakash Shukla @ 2024-05-30 12:23 ` Jerin Jacob 2024-05-30 17:35 ` [EXTERNAL] " Pavan Nikhilesh Bhagavatula 0 siblings, 1 reply; 17+ messages in thread From: Jerin Jacob @ 2024-05-30 12:23 UTC (permalink / raw) To: Amit Prakash Shukla Cc: Pavan Nikhilesh Bhagavatula, Jerin Jacob, Vamsi Krishna Attunuru, dev On Thu, May 16, 2024 at 1:09 PM Amit Prakash Shukla <amitprakashs@marvell.com> wrote: > > > -----Original Message----- > > From: pbhagavatula@marvell.com <pbhagavatula@marvell.com> > > Sent: Wednesday, April 17, 2024 1:57 PM > > To: Jerin Jacob <jerinj@marvell.com>; Amit Prakash Shukla > > <amitprakashs@marvell.com>; Vamsi Krishna Attunuru > > <vattunuru@marvell.com> > > Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula > > <pbhagavatula@marvell.com> > > Subject: [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops > > > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > > > Re-organize event DMA ops structure to allow holding source and destination > > pointers without the need for additional memory, the mempool allocating > > memory for rte_event_dma_adapter_ops can size the structure to > > accommodate all the needed source and destination pointers. > > > > Add multiple words for holding user metadata, adapter implementation > > specific metadata and event metadata. > > > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > > --- > > v3 Changes: > > - Fix stdatomic compilation. > > v2 Changes: > > - Fix 32bit compilation > > > > app/test-eventdev/test_perf_common.c | 26 ++++-------- > > app/test/test_event_dma_adapter.c | 20 +++------ > > doc/guides/prog_guide/event_dma_adapter.rst | 2 +- > > drivers/dma/cnxk/cnxk_dmadev_fp.c | 39 +++++++---------- Spotted driver change in library patch. Please split. > > lib/eventdev/rte_event_dma_adapter.c | 27 ++++-------- > > lib/eventdev/rte_event_dma_adapter.h | 46 +++++++++++++++------ > > 6 files changed, 72 insertions(+), 88 deletions(-) > > > > Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com> ^ permalink raw reply [flat|nested] 17+ messages in thread
* RE: [EXTERNAL] Re: [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops 2024-05-30 12:23 ` Jerin Jacob @ 2024-05-30 17:35 ` Pavan Nikhilesh Bhagavatula 0 siblings, 0 replies; 17+ messages in thread From: Pavan Nikhilesh Bhagavatula @ 2024-05-30 17:35 UTC (permalink / raw) To: Jerin Jacob, Amit Prakash Shukla; +Cc: Jerin Jacob, Vamsi Krishna Attunuru, dev > On Thu, May 16, 2024 at 1:09 PM Amit Prakash Shukla > <amitprakashs@marvell.com> wrote: > > > > > -----Original Message----- > > > From: pbhagavatula@marvell.com <pbhagavatula@marvell.com> > > > Sent: Wednesday, April 17, 2024 1:57 PM > > > To: Jerin Jacob <jerinj@marvell.com>; Amit Prakash Shukla > > > <amitprakashs@marvell.com>; Vamsi Krishna Attunuru > > > <vattunuru@marvell.com> > > > Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula > > > <pbhagavatula@marvell.com> > > > Subject: [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops > > > > > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > > > > > Re-organize event DMA ops structure to allow holding source and > destination > > > pointers without the need for additional memory, the mempool allocating > > > memory for rte_event_dma_adapter_ops can size the structure to > > > accommodate all the needed source and destination pointers. > > > > > > Add multiple words for holding user metadata, adapter implementation > > > specific metadata and event metadata. > > > > > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > > > --- > > > v3 Changes: > > > - Fix stdatomic compilation. > > > v2 Changes: > > > - Fix 32bit compilation > > > > > > app/test-eventdev/test_perf_common.c | 26 ++++-------- > > > app/test/test_event_dma_adapter.c | 20 +++------ > > > doc/guides/prog_guide/event_dma_adapter.rst | 2 +- > > > drivers/dma/cnxk/cnxk_dmadev_fp.c | 39 +++++++---------- > > Spotted driver change in library patch. Please split. Since this change modifies the fastpath structure rte_event_dma_adapter_op, some driver changes are required, I have moved the non-relevant changes to 2/2. > > > > lib/eventdev/rte_event_dma_adapter.c | 27 ++++-------- > > > lib/eventdev/rte_event_dma_adapter.h | 46 +++++++++++++++------ > > > 6 files changed, 72 insertions(+), 88 deletions(-) > > > > > > > Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com> ^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v4 1/2] eventdev/dma: reorganize event DMA ops 2024-04-17 8:26 ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula 2024-04-17 8:26 ` [PATCH v3 2/2] dma/cnxk: remove completion pool pbhagavatula 2024-05-16 7:39 ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops Amit Prakash Shukla @ 2024-05-30 12:44 ` pbhagavatula 2024-05-30 12:44 ` [PATCH v4 2/2] dma/cnxk: remove completion pool pbhagavatula ` (2 more replies) 2 siblings, 3 replies; 17+ messages in thread From: pbhagavatula @ 2024-05-30 12:44 UTC (permalink / raw) To: jerinj, Amit Prakash Shukla, Vamsi Attunuru; +Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Re-organize event DMA ops structure to allow holding source and destination pointers without the need for additional memory, the mempool allocating memory for rte_event_dma_adapter_ops can size the structure to accommodate all the needed source and destination pointers. Add multiple words for holding user metadata, adapter implementation specific metadata and event metadata. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com> --- v4 Changes: - Reduce unreleated driver changes and move to 2/2. v3 Changes: - Fix stdatomic compilation. v2 Changes: - Fix 32bit compilation app/test-eventdev/test_perf_common.c | 26 ++++-------- app/test/test_event_dma_adapter.c | 20 +++------ doc/guides/prog_guide/event_dma_adapter.rst | 2 +- drivers/dma/cnxk/cnxk_dmadev_fp.c | 20 ++++----- lib/eventdev/rte_event_dma_adapter.c | 27 ++++-------- lib/eventdev/rte_event_dma_adapter.h | 46 +++++++++++++++------ 6 files changed, 66 insertions(+), 75 deletions(-) diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c index 93e6132de8..db0f9c1f3b 100644 --- a/app/test-eventdev/test_perf_common.c +++ b/app/test-eventdev/test_perf_common.c @@ -1503,7 +1503,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, prod = 0; for (; port < perf_nb_event_ports(opt); port++) { struct prod_data *p = &t->prod[port]; - struct rte_event *response_info; uint32_t flow_id; p->dev_id = opt->dev_id; @@ -1523,13 +1522,10 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, for (flow_id = 0; flow_id < t->nb_flows; flow_id++) { rte_mempool_get(t->da_op_pool, (void **)&op); - op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - - op->src_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); - op->dst_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); - op->src_seg->length = 1024; - op->dst_seg->length = 1024; + op->src_dst_seg[0].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); + op->src_dst_seg[1].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); + op->src_dst_seg[0].length = 1024; + op->src_dst_seg[1].length = 1024; op->nb_src = 1; op->nb_dst = 1; op->flags = RTE_DMA_OP_FLAG_SUBMIT; @@ -1537,12 +1533,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, op->dma_dev_id = dma_dev_id; op->vchan = vchan_id; - response_info = (struct rte_event *)((uint8_t *)op + - sizeof(struct rte_event_dma_adapter_op)); - response_info->queue_id = p->queue_id; - response_info->sched_type = RTE_SCHED_TYPE_ATOMIC; - response_info->flow_id = flow_id; - p->da.dma_op[flow_id] = op; } @@ -2036,7 +2026,7 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt) return -ENODEV; } - elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event); + elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2); t->da_op_pool = rte_mempool_create("dma_op_pool", opt->pool_sz, elt_size, 256, 0, NULL, NULL, NULL, NULL, rte_socket_id(), 0); if (t->da_op_pool == NULL) { @@ -2085,10 +2075,8 @@ perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt) for (flow_id = 0; flow_id < t->nb_flows; flow_id++) { op = p->da.dma_op[flow_id]; - rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_seg->addr); - rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->dst_seg->addr); - rte_free(op->src_seg); - rte_free(op->dst_seg); + rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[0].addr); + rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[1].addr); rte_mempool_put(op->op_mp, op); } diff --git a/app/test/test_event_dma_adapter.c b/app/test/test_event_dma_adapter.c index 35b417b69f..d9dff4ff7d 100644 --- a/app/test/test_event_dma_adapter.c +++ b/app/test/test_event_dma_adapter.c @@ -235,7 +235,6 @@ test_op_forward_mode(void) struct rte_mbuf *dst_mbuf[TEST_MAX_OP]; struct rte_event_dma_adapter_op *op; struct rte_event ev[TEST_MAX_OP]; - struct rte_event response_info; int ret, i; ret = rte_pktmbuf_alloc_bulk(params.src_mbuf_pool, src_mbuf, TEST_MAX_OP); @@ -253,14 +252,11 @@ test_op_forward_mode(void) rte_mempool_get(params.op_mpool, (void **)&op); TEST_ASSERT_NOT_NULL(op, "Failed to allocate dma operation struct\n"); - op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - /* Update Op */ - op->src_seg->addr = rte_pktmbuf_iova(src_mbuf[i]); - op->dst_seg->addr = rte_pktmbuf_iova(dst_mbuf[i]); - op->src_seg->length = PACKET_LENGTH; - op->dst_seg->length = PACKET_LENGTH; + op->src_dst_seg[0].addr = rte_pktmbuf_iova(src_mbuf[i]); + op->src_dst_seg[1].addr = rte_pktmbuf_iova(dst_mbuf[i]); + op->src_dst_seg[0].length = PACKET_LENGTH; + op->src_dst_seg[1].length = PACKET_LENGTH; op->nb_src = 1; op->nb_dst = 1; op->flags = RTE_DMA_OP_FLAG_SUBMIT; @@ -268,10 +264,6 @@ test_op_forward_mode(void) op->dma_dev_id = TEST_DMA_DEV_ID; op->vchan = TEST_DMA_VCHAN_ID; - response_info.event = dma_response_info.event; - rte_memcpy((uint8_t *)op + sizeof(struct rte_event_dma_adapter_op), &response_info, - sizeof(struct rte_event)); - /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */ memset(&ev[i], 0, sizeof(struct rte_event)); ev[i].event = 0; @@ -294,8 +286,6 @@ test_op_forward_mode(void) TEST_ASSERT_EQUAL(ret, 0, "Data mismatch for dma adapter\n"); - rte_free(op->src_seg); - rte_free(op->dst_seg); rte_mempool_put(op->op_mp, op); } @@ -400,7 +390,7 @@ configure_dmadev(void) rte_socket_id()); RTE_TEST_ASSERT_NOT_NULL(params.dst_mbuf_pool, "Can't create DMA_DST_MBUFPOOL\n"); - elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event); + elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2); params.op_mpool = rte_mempool_create("EVENT_DMA_OP_POOL", DMA_OP_POOL_SIZE, elt_size, 0, 0, NULL, NULL, NULL, NULL, rte_socket_id(), 0); RTE_TEST_ASSERT_NOT_NULL(params.op_mpool, "Can't create DMA_OP_POOL\n"); diff --git a/doc/guides/prog_guide/event_dma_adapter.rst b/doc/guides/prog_guide/event_dma_adapter.rst index 3443b6a803..1fb9b0a07b 100644 --- a/doc/guides/prog_guide/event_dma_adapter.rst +++ b/doc/guides/prog_guide/event_dma_adapter.rst @@ -144,7 +144,7 @@ on which it enqueues events towards the DMA adapter using ``rte_event_enqueue_bu uint32_t cap; int ret; - /* Fill in event info and update event_ptr with rte_dma_op */ + /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */ memset(&ev, 0, sizeof(ev)); . . diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c index f6562b603e..8a3c0c1008 100644 --- a/drivers/dma/cnxk/cnxk_dmadev_fp.c +++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c @@ -490,8 +490,8 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) hdr[1] = ((uint64_t)comp_ptr); hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event); - src = &op->src_seg[0]; - dst = &op->dst_seg[0]; + src = &op->src_dst_seg[0]; + dst = &op->src_dst_seg[op->nb_src]; if (CNXK_TAG_IS_HEAD(work->gw_rdata) || ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) && @@ -566,12 +566,12 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event * For all other cases, src pointers are first pointers. */ if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { - fptr = &op->dst_seg[0]; - lptr = &op->src_seg[0]; + fptr = &op->src_dst_seg[nb_src]; + lptr = &op->src_dst_seg[0]; RTE_SWAP(nb_src, nb_dst); } else { - fptr = &op->src_seg[0]; - lptr = &op->dst_seg[0]; + fptr = &op->src_dst_seg[0]; + lptr = &op->src_dst_seg[nb_src]; } hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; @@ -647,12 +647,12 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) * For all other cases, src pointers are first pointers. */ if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { - fptr = &op->dst_seg[0]; - lptr = &op->src_seg[0]; + fptr = &op->src_dst_seg[nb_src]; + lptr = &op->src_dst_seg[0]; RTE_SWAP(nb_src, nb_dst); } else { - fptr = &op->src_seg[0]; - lptr = &op->dst_seg[0]; + fptr = &op->src_dst_seg[0]; + lptr = &op->src_dst_seg[nb_src]; } hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; diff --git a/lib/eventdev/rte_event_dma_adapter.c b/lib/eventdev/rte_event_dma_adapter.c index 24dff556db..e52ef46a1b 100644 --- a/lib/eventdev/rte_event_dma_adapter.c +++ b/lib/eventdev/rte_event_dma_adapter.c @@ -236,9 +236,9 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter, uint16_t vchan, uint16_t *nb_ops_flushed) { struct rte_event_dma_adapter_op *op; - struct dma_vchan_info *tq; uint16_t *head = &bufp->head; uint16_t *tail = &bufp->tail; + struct dma_vchan_info *tq; uint16_t n; uint16_t i; int ret; @@ -257,11 +257,13 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter, for (i = 0; i < n; i++) { op = bufp->op_buffer[*head]; if (op->nb_src == 1 && op->nb_dst == 1) - ret = rte_dma_copy(dma_dev_id, vchan, op->src_seg->addr, op->dst_seg->addr, - op->src_seg->length, op->flags); + ret = rte_dma_copy(dma_dev_id, vchan, op->src_dst_seg[0].addr, + op->src_dst_seg[1].addr, op->src_dst_seg[0].length, + op->flags); else - ret = rte_dma_copy_sg(dma_dev_id, vchan, op->src_seg, op->dst_seg, - op->nb_src, op->nb_dst, op->flags); + ret = rte_dma_copy_sg(dma_dev_id, vchan, &op->src_dst_seg[0], + &op->src_dst_seg[op->nb_src], op->nb_src, op->nb_dst, + op->flags); if (ret < 0) break; @@ -511,8 +513,7 @@ edma_enq_to_dma_dev(struct event_dma_adapter *adapter, struct rte_event *ev, uns if (dma_op == NULL) continue; - /* Expected to have response info appended to dma_op. */ - + dma_op->impl_opaque[0] = ev[i].event; dma_dev_id = dma_op->dma_dev_id; vchan = dma_op->vchan; vchan_qinfo = &adapter->dma_devs[dma_dev_id].vchanq[vchan]; @@ -647,7 +648,6 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a uint8_t event_port_id = adapter->event_port_id; uint8_t event_dev_id = adapter->eventdev_id; struct rte_event events[DMA_BATCH_SIZE]; - struct rte_event *response_info; uint16_t nb_enqueued, nb_ev; uint8_t retry; uint8_t i; @@ -659,16 +659,7 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a for (i = 0; i < num; i++) { struct rte_event *ev = &events[nb_ev++]; - /* Expected to have response info appended to dma_op. */ - response_info = (struct rte_event *)((uint8_t *)ops[i] + - sizeof(struct rte_event_dma_adapter_op)); - if (unlikely(response_info == NULL)) { - if (ops[i] != NULL && ops[i]->op_mp != NULL) - rte_mempool_put(ops[i]->op_mp, ops[i]); - continue; - } - - rte_memcpy(ev, response_info, sizeof(struct rte_event)); + ev->event = ops[i]->impl_opaque[0]; ev->event_ptr = ops[i]; ev->event_type = RTE_EVENT_TYPE_DMADEV; if (adapter->implicit_release_disabled) diff --git a/lib/eventdev/rte_event_dma_adapter.h b/lib/eventdev/rte_event_dma_adapter.h index e924ab673d..048ddba3f3 100644 --- a/lib/eventdev/rte_event_dma_adapter.h +++ b/lib/eventdev/rte_event_dma_adapter.h @@ -157,24 +157,46 @@ extern "C" { * instance. */ struct rte_event_dma_adapter_op { - struct rte_dma_sge *src_seg; - /**< Source segments. */ - struct rte_dma_sge *dst_seg; - /**< Destination segments. */ - uint16_t nb_src; - /**< Number of source segments. */ - uint16_t nb_dst; - /**< Number of destination segments. */ uint64_t flags; /**< Flags related to the operation. * @see RTE_DMA_OP_FLAG_* */ - int16_t dma_dev_id; - /**< DMA device ID to be used */ - uint16_t vchan; - /**< DMA vchan ID to be used */ struct rte_mempool *op_mp; /**< Mempool from which op is allocated. */ + enum rte_dma_status_code status; + /**< Status code for this operation. */ + uint32_t rsvd; + /**< Reserved for future use. */ + uint64_t impl_opaque[2]; + /**< Implementation-specific opaque data. + * An dma device implementation use this field to hold + * implementation specific values to share between dequeue and enqueue + * operations. + * The application should not modify this field. + */ + uint64_t user_meta; + /**< Memory to store user specific metadata. + * The dma device implementation should not modify this area. + */ + uint64_t event_meta; + /**< Event metadata that defines event attributes when used in OP_NEW mode. + * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_NEW + * @see struct rte_event::event + */ + int16_t dma_dev_id; + /**< DMA device ID to be used with OP_FORWARD mode. + * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD + */ + uint16_t vchan; + /**< DMA vchan ID to be used with OP_FORWARD mode + * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD + */ + uint16_t nb_src; + /**< Number of source segments. */ + uint16_t nb_dst; + /**< Number of destination segments. */ + struct rte_dma_sge src_dst_seg[0]; + /**< Source and destination segments. */ }; /** -- 2.25.1 ^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v4 2/2] dma/cnxk: remove completion pool 2024-05-30 12:44 ` [PATCH v4 " pbhagavatula @ 2024-05-30 12:44 ` pbhagavatula 2024-06-07 10:20 ` [PATCH v4 1/2] eventdev/dma: reorganize event DMA ops Jerin Jacob 2024-06-07 10:36 ` [PATCH v5 " pbhagavatula 2 siblings, 0 replies; 17+ messages in thread From: pbhagavatula @ 2024-05-30 12:44 UTC (permalink / raw) To: jerinj, Vamsi Attunuru, Pavan Nikhilesh, Shijith Thotton; +Cc: dev From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use DMA ops to store metadata, remove use of completion pool. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Vamsi Attunuru <vattunuru@marvell.com> --- drivers/dma/cnxk/cnxk_dmadev.c | 53 ++++-------- drivers/dma/cnxk/cnxk_dmadev.h | 24 +----- drivers/dma/cnxk/cnxk_dmadev_fp.c | 101 ++++++----------------- drivers/event/cnxk/cnxk_eventdev_adptr.c | 47 ++--------- 4 files changed, 54 insertions(+), 171 deletions(-) diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c index 4ab3cfbdf2..dfd7222713 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.c +++ b/drivers/dma/cnxk/cnxk_dmadev.c @@ -2,6 +2,8 @@ * Copyright (C) 2021 Marvell International Ltd. */ +#include <rte_event_dma_adapter.h> + #include <cnxk_dmadev.h> static int cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan); @@ -30,8 +32,7 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan) { struct cnxk_dpi_conf *dpi_conf; uint16_t num_vchans; - uint16_t max_desc; - int i, j; + int i; if (vchan == RTE_DMA_ALL_VCHAN) { num_vchans = dpivf->num_vchans; @@ -46,12 +47,6 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan) for (; i < num_vchans; i++) { dpi_conf = &dpivf->conf[i]; - max_desc = dpi_conf->c_desc.max_cnt + 1; - if (dpi_conf->c_desc.compl_ptr) { - for (j = 0; j < max_desc; j++) - rte_free(dpi_conf->c_desc.compl_ptr[j]); - } - rte_free(dpi_conf->c_desc.compl_ptr); dpi_conf->c_desc.compl_ptr = NULL; } @@ -261,7 +256,7 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, if (max_desc > CNXK_DPI_MAX_DESC) max_desc = CNXK_DPI_MAX_DESC; - size = (max_desc * sizeof(struct cnxk_dpi_compl_s *)); + size = (max_desc * sizeof(uint8_t) * CNXK_DPI_COMPL_OFFSET); dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0); if (dpi_conf->c_desc.compl_ptr == NULL) { @@ -269,16 +264,8 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, return -ENOMEM; } - for (i = 0; i < max_desc; i++) { - dpi_conf->c_desc.compl_ptr[i] = - rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0); - if (!dpi_conf->c_desc.compl_ptr[i]) { - plt_err("Failed to allocate for descriptor memory"); - return -ENOMEM; - } - - dpi_conf->c_desc.compl_ptr[i]->cdata = CNXK_DPI_REQ_CDATA; - } + for (i = 0; i < max_desc; i++) + dpi_conf->c_desc.compl_ptr[i * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; dpi_conf->c_desc.max_cnt = (max_desc - 1); @@ -301,10 +288,8 @@ cnxk_dmadev_start(struct rte_dma_dev *dev) dpi_conf->pnum_words = 0; dpi_conf->pending = 0; dpi_conf->desc_idx = 0; - for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) { - if (dpi_conf->c_desc.compl_ptr[j]) - dpi_conf->c_desc.compl_ptr[j]->cdata = CNXK_DPI_REQ_CDATA; - } + for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) + dpi_conf->c_desc.compl_ptr[j * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; nb_desc += dpi_conf->c_desc.max_cnt + 1; cnxk_stats_reset(dev, i); dpi_conf->completed_offset = 0; @@ -382,22 +367,22 @@ cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls, struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t status; int cnt; for (cnt = 0; cnt < nb_cpls; cnt++) { - comp_ptr = c_desc->compl_ptr[c_desc->head]; - - if (comp_ptr->cdata) { - if (comp_ptr->cdata == CNXK_DPI_REQ_CDATA) + status = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET]; + if (status) { + if (status == CNXK_DPI_REQ_CDATA) break; *has_error = 1; dpi_conf->stats.errors++; + c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = + CNXK_DPI_REQ_CDATA; CNXK_DPI_STRM_INC(*c_desc, head); break; } - - comp_ptr->cdata = CNXK_DPI_REQ_CDATA; + c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; CNXK_DPI_STRM_INC(*c_desc, head); } @@ -414,19 +399,17 @@ cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan, const uint16_t n struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc; - struct cnxk_dpi_compl_s *comp_ptr; int cnt; for (cnt = 0; cnt < nb_cpls; cnt++) { - comp_ptr = c_desc->compl_ptr[c_desc->head]; - status[cnt] = comp_ptr->cdata; + status[cnt] = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET]; if (status[cnt]) { if (status[cnt] == CNXK_DPI_REQ_CDATA) break; dpi_conf->stats.errors++; } - comp_ptr->cdata = CNXK_DPI_REQ_CDATA; + c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; CNXK_DPI_STRM_INC(*c_desc, head); } @@ -593,7 +576,7 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de rdpi = &dpivf->rdpi; rdpi->pci_dev = pci_dev; - rc = roc_dpi_dev_init(rdpi, offsetof(struct cnxk_dpi_compl_s, wqecs)); + rc = roc_dpi_dev_init(rdpi, offsetof(struct rte_event_dma_adapter_op, impl_opaque)); if (rc < 0) goto err_out_free; diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h index 610a360ba2..a80db333a0 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.h +++ b/drivers/dma/cnxk/cnxk_dmadev.h @@ -37,17 +37,12 @@ #define CNXK_DPI_MAX_CMD_SZ CNXK_DPI_CMD_LEN(CNXK_DPI_MAX_POINTER, \ CNXK_DPI_MAX_POINTER) #define CNXK_DPI_CHUNKS_FROM_DESC(cz, desc) (((desc) / (((cz) / 8) / CNXK_DPI_MAX_CMD_SZ)) + 1) - +#define CNXK_DPI_COMPL_OFFSET ROC_CACHE_LINE_SZ /* Set Completion data to 0xFF when request submitted, * upon successful request completion engine reset to completion status */ #define CNXK_DPI_REQ_CDATA 0xFF -/* Set Completion data to 0xDEADBEEF when request submitted for SSO. - * This helps differentiate if the dequeue is called after cnxk enueue. - */ -#define CNXK_DPI_REQ_SSO_CDATA 0xDEADBEEF - union cnxk_dpi_instr_cmd { uint64_t u; struct cn9k_dpi_instr_cmd { @@ -91,24 +86,11 @@ union cnxk_dpi_instr_cmd { } cn10k; }; -struct cnxk_dpi_compl_s { - uint64_t cdata; - void *op; - uint16_t dev_id; - uint16_t vchan; - uint32_t wqecs; -}; - struct cnxk_dpi_cdesc_data_s { - struct cnxk_dpi_compl_s **compl_ptr; uint16_t max_cnt; uint16_t head; uint16_t tail; -}; - -struct cnxk_dma_adapter_info { - bool enabled; /* Set if vchan queue is added to dma adapter. */ - struct rte_mempool *req_mp; /* DMA inflight request mempool. */ + uint8_t *compl_ptr; }; struct cnxk_dpi_conf { @@ -119,7 +101,7 @@ struct cnxk_dpi_conf { uint16_t desc_idx; struct rte_dma_stats stats; uint64_t completed_offset; - struct cnxk_dma_adapter_info adapter_info; + bool adapter_enabled; }; struct cnxk_dpi_vf_s { diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c index 8a3c0c1008..26591235c6 100644 --- a/drivers/dma/cnxk/cnxk_dmadev_fp.c +++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c @@ -245,14 +245,14 @@ cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t d struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD]; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; int rc; if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); cmd[0] = (1UL << 54) | (1UL << 48); @@ -301,7 +301,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; const struct rte_dma_sge *fptr, *lptr; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; uint64_t hdr[4]; int rc; @@ -309,7 +309,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); hdr[1] = dpi_conf->cmd.u | ((flags & RTE_DMA_OP_FLAG_AUTO_FREE) << 37); @@ -357,14 +357,14 @@ cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD]; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; int rc; if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); cmd[0] = dpi_conf->cmd.u | (1U << 6) | 1U; @@ -403,7 +403,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge { struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; uint64_t hdr[4]; int rc; @@ -411,7 +411,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); hdr[0] = dpi_conf->cmd.u | (nb_dst << 6) | nb_src; @@ -454,10 +454,8 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) { const struct rte_dma_sge *src, *dst; struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; - struct rte_event *rsp_info; struct cn10k_sso_hws *work; uint16_t nb_src, nb_dst; rte_mcslock_t mcs_lock_me; @@ -469,34 +467,23 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) for (count = 0; count < nb_events; count++) { op = ev[count].event_ptr; - rsp_info = (struct rte_event *)((uint8_t *)op + - sizeof(struct rte_event_dma_adapter_op)); - dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; - if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) - return count; - - comp_ptr->op = op; - comp_ptr->dev_id = op->dma_dev_id; - comp_ptr->vchan = op->vchan; - comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; - nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54); hdr[0] |= (nb_dst << 6) | nb_src; - hdr[1] = ((uint64_t)comp_ptr); - hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event); + hdr[1] = (uint64_t)op; + hdr[2] = cnxk_dma_adapter_format_event(ev[count].event); src = &op->src_dst_seg[0]; dst = &op->src_dst_seg[op->nb_src]; if (CNXK_TAG_IS_HEAD(work->gw_rdata) || ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) && - (rsp_info->sched_type & DPI_HDR_TT_MASK) == - RTE_SCHED_TYPE_ORDERED)) + (ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)) roc_sso_hws_head_wait(work->base); rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); @@ -528,7 +515,6 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event { const struct rte_dma_sge *fptr, *lptr; struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cn9k_sso_hws_dual *work; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; @@ -548,16 +534,8 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; - if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) - return count; - - comp_ptr->op = op; - comp_ptr->dev_id = op->dma_dev_id; - comp_ptr->vchan = op->vchan; - comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; - hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36); - hdr[2] = (uint64_t)comp_ptr; + hdr[2] = (uint64_t)op; nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; @@ -609,10 +587,8 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) { const struct rte_dma_sge *fptr, *lptr; struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; - struct rte_event *rsp_info; struct cn9k_sso_hws *work; uint16_t nb_src, nb_dst; rte_mcslock_t mcs_lock_me; @@ -624,21 +600,11 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) for (count = 0; count < nb_events; count++) { op = ev[count].event_ptr; - rsp_info = (struct rte_event *)((uint8_t *)op + - sizeof(struct rte_event_dma_adapter_op)); - dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; - if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) - return count; - - comp_ptr->op = op; - comp_ptr->dev_id = op->dma_dev_id; - comp_ptr->vchan = op->vchan; - comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; - hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36); - hdr[2] = (uint64_t)comp_ptr; + hdr[2] = (uint64_t)op; nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; @@ -656,9 +622,9 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) } hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; - hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event); + hdr[0] |= cnxk_dma_adapter_format_event(ev[count].event); - if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED) + if ((ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED) roc_sso_hws_head_wait(work->base); rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); @@ -689,38 +655,23 @@ uintptr_t cnxk_dma_adapter_dequeue(uintptr_t get_work1) { struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; - rte_mcslock_t mcs_lock_me; - RTE_ATOMIC(uint8_t) *wqecs; - comp_ptr = (struct cnxk_dpi_compl_s *)get_work1; + op = (struct rte_event_dma_adapter_op *)get_work1; + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpi_conf = &dpivf->conf[op->vchan]; - /* Dequeue can be called without calling cnx_enqueue in case of - * dma_adapter. When its called from adapter, dma op will not be - * embedded in completion pointer. In those cases return op. - */ - if (comp_ptr->cdata != CNXK_DPI_REQ_SSO_CDATA) - return (uintptr_t)comp_ptr; - - dpivf = rte_dma_fp_objs[comp_ptr->dev_id].dev_private; - dpi_conf = &dpivf->conf[comp_ptr->vchan]; - - rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); - wqecs = (uint8_t __rte_atomic *)&comp_ptr->wqecs; - if (rte_atomic_load_explicit(wqecs, rte_memory_order_relaxed) != 0) - dpi_conf->stats.errors++; + if (rte_atomic_load_explicit((RTE_ATOMIC(uint64_t) *)&op->impl_opaque[0], + rte_memory_order_relaxed) != 0) + rte_atomic_fetch_add_explicit((RTE_ATOMIC(uint64_t) *)&dpi_conf->stats.errors, 1, + rte_memory_order_relaxed); /* Take into account errors also. This is similar to * cnxk_dmadev_completed_status(). */ - dpi_conf->stats.completed++; - rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me); - - op = (struct rte_event_dma_adapter_op *)comp_ptr->op; - - rte_mempool_put(dpi_conf->adapter_info.req_mp, comp_ptr); + rte_atomic_fetch_add_explicit((RTE_ATOMIC(uint64_t) *)&dpi_conf->stats.completed, 1, + rte_memory_order_relaxed); return (uintptr_t)op; } diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index a2a59b16c9..98db11ad61 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -739,31 +739,6 @@ cnxk_crypto_adapter_qp_del(const struct rte_cryptodev *cdev, return 0; } -static int -dma_adapter_vchan_setup(const int16_t dma_dev_id, struct cnxk_dpi_conf *vchan, - uint16_t vchan_id) -{ - char name[RTE_MEMPOOL_NAMESIZE]; - uint32_t cache_size, nb_req; - unsigned int req_size; - - snprintf(name, RTE_MEMPOOL_NAMESIZE, "cnxk_dma_req_%u:%u", dma_dev_id, vchan_id); - req_size = sizeof(struct cnxk_dpi_compl_s); - - nb_req = vchan->c_desc.max_cnt; - cache_size = 16; - nb_req += (cache_size * rte_lcore_count()); - - vchan->adapter_info.req_mp = rte_mempool_create(name, nb_req, req_size, cache_size, 0, - NULL, NULL, NULL, NULL, rte_socket_id(), 0); - if (vchan->adapter_info.req_mp == NULL) - return -ENOMEM; - - vchan->adapter_info.enabled = true; - - return 0; -} - int cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, const int16_t dma_dev_id, uint16_t vchan_id) @@ -772,7 +747,6 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, uint32_t adptr_xae_cnt = 0; struct cnxk_dpi_vf_s *dpivf; struct cnxk_dpi_conf *vchan; - int ret; dpivf = rte_dma_fp_objs[dma_dev_id].dev_private; if ((int16_t)vchan_id == -1) { @@ -780,19 +754,13 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) { vchan = &dpivf->conf[vchan_id]; - ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id); - if (ret) { - cnxk_dma_adapter_vchan_del(dma_dev_id, -1); - return ret; - } - adptr_xae_cnt += vchan->adapter_info.req_mp->size; + vchan->adapter_enabled = true; + adptr_xae_cnt += vchan->c_desc.max_cnt; } } else { vchan = &dpivf->conf[vchan_id]; - ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id); - if (ret) - return ret; - adptr_xae_cnt = vchan->adapter_info.req_mp->size; + vchan->adapter_enabled = true; + adptr_xae_cnt = vchan->c_desc.max_cnt; } /* Update dma adapter XAE count */ @@ -805,8 +773,7 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, static int dma_adapter_vchan_free(struct cnxk_dpi_conf *vchan) { - rte_mempool_free(vchan->adapter_info.req_mp); - vchan->adapter_info.enabled = false; + vchan->adapter_enabled = false; return 0; } @@ -823,12 +790,12 @@ cnxk_dma_adapter_vchan_del(const int16_t dma_dev_id, uint16_t vchan_id) for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) { vchan = &dpivf->conf[vchan_id]; - if (vchan->adapter_info.enabled) + if (vchan->adapter_enabled) dma_adapter_vchan_free(vchan); } } else { vchan = &dpivf->conf[vchan_id]; - if (vchan->adapter_info.enabled) + if (vchan->adapter_enabled) dma_adapter_vchan_free(vchan); } -- 2.25.1 ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v4 1/2] eventdev/dma: reorganize event DMA ops 2024-05-30 12:44 ` [PATCH v4 " pbhagavatula 2024-05-30 12:44 ` [PATCH v4 2/2] dma/cnxk: remove completion pool pbhagavatula @ 2024-06-07 10:20 ` Jerin Jacob 2024-06-07 10:36 ` [PATCH v5 " pbhagavatula 2 siblings, 0 replies; 17+ messages in thread From: Jerin Jacob @ 2024-06-07 10:20 UTC (permalink / raw) To: pbhagavatula; +Cc: jerinj, Amit Prakash Shukla, Vamsi Attunuru, dev On Thu, May 30, 2024 at 6:14 PM <pbhagavatula@marvell.com> wrote: > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Re-organize event DMA ops structure to allow holding > source and destination pointers without the need for > additional memory, the mempool allocating memory for > rte_event_dma_adapter_ops can size the structure to > accommodate all the needed source and destination > pointers. > > Add multiple words for holding user metadata, adapter > implementation specific metadata and event metadata. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com> > --- > v4 Changes: > - Reduce unreleated driver changes and move to 2/2. > v3 Changes: > - Fix stdatomic compilation. > v2 Changes: > - Fix 32bit compilation > > app/test-eventdev/test_perf_common.c | 26 ++++-------- > app/test/test_event_dma_adapter.c | 20 +++------ > doc/guides/prog_guide/event_dma_adapter.rst | 2 +- > drivers/dma/cnxk/cnxk_dmadev_fp.c | 20 ++++----- > lib/eventdev/rte_event_dma_adapter.c | 27 ++++-------- > lib/eventdev/rte_event_dma_adapter.h | 46 +++++++++++++++------ > 6 files changed, 66 insertions(+), 75 deletions(-) > * instance. > */ > struct rte_event_dma_adapter_op { > - struct rte_dma_sge *src_seg; Even though it is experimental , Changes in public structure need to updated in release notes. Please send the next version. ^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v5 1/2] eventdev/dma: reorganize event DMA ops 2024-05-30 12:44 ` [PATCH v4 " pbhagavatula 2024-05-30 12:44 ` [PATCH v4 2/2] dma/cnxk: remove completion pool pbhagavatula 2024-06-07 10:20 ` [PATCH v4 1/2] eventdev/dma: reorganize event DMA ops Jerin Jacob @ 2024-06-07 10:36 ` pbhagavatula 2024-06-07 10:36 ` [PATCH v5 2/2] dma/cnxk: remove completion pool pbhagavatula 2024-06-08 6:16 ` [PATCH v5 1/2] eventdev/dma: reorganize event DMA ops Jerin Jacob 2 siblings, 2 replies; 17+ messages in thread From: pbhagavatula @ 2024-06-07 10:36 UTC (permalink / raw) To: jerinj, Amit Prakash Shukla, Vamsi Attunuru; +Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Re-organize event DMA ops structure to allow holding source and destination pointers without the need for additional memory, the mempool allocating memory for rte_event_dma_adapter_ops can size the structure to accommodate all the needed source and destination pointers. Add multiple words for holding user metadata, adapter implementation specific metadata and event metadata. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com> --- v5 Changes: - Update release notes with Experimental API changes. v4 Changes: - Reduce unreleated driver changes and move to 2/2. v3 Changes: - Fix stdatomic compilation. v2 Changes: - Fix 32bit compilation app/test-eventdev/test_perf_common.c | 26 ++++-------- app/test/test_event_dma_adapter.c | 20 +++------ doc/guides/prog_guide/event_dma_adapter.rst | 2 +- doc/guides/rel_notes/release_24_07.rst | 3 ++ drivers/dma/cnxk/cnxk_dmadev_fp.c | 20 ++++----- lib/eventdev/rte_event_dma_adapter.c | 27 ++++-------- lib/eventdev/rte_event_dma_adapter.h | 46 +++++++++++++++------ 7 files changed, 69 insertions(+), 75 deletions(-) diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c index 93e6132de8..db0f9c1f3b 100644 --- a/app/test-eventdev/test_perf_common.c +++ b/app/test-eventdev/test_perf_common.c @@ -1503,7 +1503,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, prod = 0; for (; port < perf_nb_event_ports(opt); port++) { struct prod_data *p = &t->prod[port]; - struct rte_event *response_info; uint32_t flow_id; p->dev_id = opt->dev_id; @@ -1523,13 +1522,10 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, for (flow_id = 0; flow_id < t->nb_flows; flow_id++) { rte_mempool_get(t->da_op_pool, (void **)&op); - op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - - op->src_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); - op->dst_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); - op->src_seg->length = 1024; - op->dst_seg->length = 1024; + op->src_dst_seg[0].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); + op->src_dst_seg[1].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool)); + op->src_dst_seg[0].length = 1024; + op->src_dst_seg[1].length = 1024; op->nb_src = 1; op->nb_dst = 1; op->flags = RTE_DMA_OP_FLAG_SUBMIT; @@ -1537,12 +1533,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, op->dma_dev_id = dma_dev_id; op->vchan = vchan_id; - response_info = (struct rte_event *)((uint8_t *)op + - sizeof(struct rte_event_dma_adapter_op)); - response_info->queue_id = p->queue_id; - response_info->sched_type = RTE_SCHED_TYPE_ATOMIC; - response_info->flow_id = flow_id; - p->da.dma_op[flow_id] = op; } @@ -2036,7 +2026,7 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt) return -ENODEV; } - elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event); + elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2); t->da_op_pool = rte_mempool_create("dma_op_pool", opt->pool_sz, elt_size, 256, 0, NULL, NULL, NULL, NULL, rte_socket_id(), 0); if (t->da_op_pool == NULL) { @@ -2085,10 +2075,8 @@ perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt) for (flow_id = 0; flow_id < t->nb_flows; flow_id++) { op = p->da.dma_op[flow_id]; - rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_seg->addr); - rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->dst_seg->addr); - rte_free(op->src_seg); - rte_free(op->dst_seg); + rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[0].addr); + rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[1].addr); rte_mempool_put(op->op_mp, op); } diff --git a/app/test/test_event_dma_adapter.c b/app/test/test_event_dma_adapter.c index 35b417b69f..d9dff4ff7d 100644 --- a/app/test/test_event_dma_adapter.c +++ b/app/test/test_event_dma_adapter.c @@ -235,7 +235,6 @@ test_op_forward_mode(void) struct rte_mbuf *dst_mbuf[TEST_MAX_OP]; struct rte_event_dma_adapter_op *op; struct rte_event ev[TEST_MAX_OP]; - struct rte_event response_info; int ret, i; ret = rte_pktmbuf_alloc_bulk(params.src_mbuf_pool, src_mbuf, TEST_MAX_OP); @@ -253,14 +252,11 @@ test_op_forward_mode(void) rte_mempool_get(params.op_mpool, (void **)&op); TEST_ASSERT_NOT_NULL(op, "Failed to allocate dma operation struct\n"); - op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0); - /* Update Op */ - op->src_seg->addr = rte_pktmbuf_iova(src_mbuf[i]); - op->dst_seg->addr = rte_pktmbuf_iova(dst_mbuf[i]); - op->src_seg->length = PACKET_LENGTH; - op->dst_seg->length = PACKET_LENGTH; + op->src_dst_seg[0].addr = rte_pktmbuf_iova(src_mbuf[i]); + op->src_dst_seg[1].addr = rte_pktmbuf_iova(dst_mbuf[i]); + op->src_dst_seg[0].length = PACKET_LENGTH; + op->src_dst_seg[1].length = PACKET_LENGTH; op->nb_src = 1; op->nb_dst = 1; op->flags = RTE_DMA_OP_FLAG_SUBMIT; @@ -268,10 +264,6 @@ test_op_forward_mode(void) op->dma_dev_id = TEST_DMA_DEV_ID; op->vchan = TEST_DMA_VCHAN_ID; - response_info.event = dma_response_info.event; - rte_memcpy((uint8_t *)op + sizeof(struct rte_event_dma_adapter_op), &response_info, - sizeof(struct rte_event)); - /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */ memset(&ev[i], 0, sizeof(struct rte_event)); ev[i].event = 0; @@ -294,8 +286,6 @@ test_op_forward_mode(void) TEST_ASSERT_EQUAL(ret, 0, "Data mismatch for dma adapter\n"); - rte_free(op->src_seg); - rte_free(op->dst_seg); rte_mempool_put(op->op_mp, op); } @@ -400,7 +390,7 @@ configure_dmadev(void) rte_socket_id()); RTE_TEST_ASSERT_NOT_NULL(params.dst_mbuf_pool, "Can't create DMA_DST_MBUFPOOL\n"); - elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event); + elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2); params.op_mpool = rte_mempool_create("EVENT_DMA_OP_POOL", DMA_OP_POOL_SIZE, elt_size, 0, 0, NULL, NULL, NULL, NULL, rte_socket_id(), 0); RTE_TEST_ASSERT_NOT_NULL(params.op_mpool, "Can't create DMA_OP_POOL\n"); diff --git a/doc/guides/prog_guide/event_dma_adapter.rst b/doc/guides/prog_guide/event_dma_adapter.rst index 3443b6a803..1fb9b0a07b 100644 --- a/doc/guides/prog_guide/event_dma_adapter.rst +++ b/doc/guides/prog_guide/event_dma_adapter.rst @@ -144,7 +144,7 @@ on which it enqueues events towards the DMA adapter using ``rte_event_enqueue_bu uint32_t cap; int ret; - /* Fill in event info and update event_ptr with rte_dma_op */ + /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */ memset(&ev, 0, sizeof(ev)); . . diff --git a/doc/guides/rel_notes/release_24_07.rst b/doc/guides/rel_notes/release_24_07.rst index a69f24cf99..7800cb4c31 100644 --- a/doc/guides/rel_notes/release_24_07.rst +++ b/doc/guides/rel_notes/release_24_07.rst @@ -84,6 +84,9 @@ API Changes Also, make sure to start the actual text at the margin. ======================================================= +* eventdev: Reorganize the fastpath structure ``rte_event_dma_adapter_op`` + to optimize the memory layout and improve performance. + ABI Changes ----------- diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c index f6562b603e..8a3c0c1008 100644 --- a/drivers/dma/cnxk/cnxk_dmadev_fp.c +++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c @@ -490,8 +490,8 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) hdr[1] = ((uint64_t)comp_ptr); hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event); - src = &op->src_seg[0]; - dst = &op->dst_seg[0]; + src = &op->src_dst_seg[0]; + dst = &op->src_dst_seg[op->nb_src]; if (CNXK_TAG_IS_HEAD(work->gw_rdata) || ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) && @@ -566,12 +566,12 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event * For all other cases, src pointers are first pointers. */ if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { - fptr = &op->dst_seg[0]; - lptr = &op->src_seg[0]; + fptr = &op->src_dst_seg[nb_src]; + lptr = &op->src_dst_seg[0]; RTE_SWAP(nb_src, nb_dst); } else { - fptr = &op->src_seg[0]; - lptr = &op->dst_seg[0]; + fptr = &op->src_dst_seg[0]; + lptr = &op->src_dst_seg[nb_src]; } hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; @@ -647,12 +647,12 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) * For all other cases, src pointers are first pointers. */ if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) { - fptr = &op->dst_seg[0]; - lptr = &op->src_seg[0]; + fptr = &op->src_dst_seg[nb_src]; + lptr = &op->src_dst_seg[0]; RTE_SWAP(nb_src, nb_dst); } else { - fptr = &op->src_seg[0]; - lptr = &op->dst_seg[0]; + fptr = &op->src_dst_seg[0]; + lptr = &op->src_dst_seg[nb_src]; } hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; diff --git a/lib/eventdev/rte_event_dma_adapter.c b/lib/eventdev/rte_event_dma_adapter.c index 24dff556db..e52ef46a1b 100644 --- a/lib/eventdev/rte_event_dma_adapter.c +++ b/lib/eventdev/rte_event_dma_adapter.c @@ -236,9 +236,9 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter, uint16_t vchan, uint16_t *nb_ops_flushed) { struct rte_event_dma_adapter_op *op; - struct dma_vchan_info *tq; uint16_t *head = &bufp->head; uint16_t *tail = &bufp->tail; + struct dma_vchan_info *tq; uint16_t n; uint16_t i; int ret; @@ -257,11 +257,13 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter, for (i = 0; i < n; i++) { op = bufp->op_buffer[*head]; if (op->nb_src == 1 && op->nb_dst == 1) - ret = rte_dma_copy(dma_dev_id, vchan, op->src_seg->addr, op->dst_seg->addr, - op->src_seg->length, op->flags); + ret = rte_dma_copy(dma_dev_id, vchan, op->src_dst_seg[0].addr, + op->src_dst_seg[1].addr, op->src_dst_seg[0].length, + op->flags); else - ret = rte_dma_copy_sg(dma_dev_id, vchan, op->src_seg, op->dst_seg, - op->nb_src, op->nb_dst, op->flags); + ret = rte_dma_copy_sg(dma_dev_id, vchan, &op->src_dst_seg[0], + &op->src_dst_seg[op->nb_src], op->nb_src, op->nb_dst, + op->flags); if (ret < 0) break; @@ -511,8 +513,7 @@ edma_enq_to_dma_dev(struct event_dma_adapter *adapter, struct rte_event *ev, uns if (dma_op == NULL) continue; - /* Expected to have response info appended to dma_op. */ - + dma_op->impl_opaque[0] = ev[i].event; dma_dev_id = dma_op->dma_dev_id; vchan = dma_op->vchan; vchan_qinfo = &adapter->dma_devs[dma_dev_id].vchanq[vchan]; @@ -647,7 +648,6 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a uint8_t event_port_id = adapter->event_port_id; uint8_t event_dev_id = adapter->eventdev_id; struct rte_event events[DMA_BATCH_SIZE]; - struct rte_event *response_info; uint16_t nb_enqueued, nb_ev; uint8_t retry; uint8_t i; @@ -659,16 +659,7 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a for (i = 0; i < num; i++) { struct rte_event *ev = &events[nb_ev++]; - /* Expected to have response info appended to dma_op. */ - response_info = (struct rte_event *)((uint8_t *)ops[i] + - sizeof(struct rte_event_dma_adapter_op)); - if (unlikely(response_info == NULL)) { - if (ops[i] != NULL && ops[i]->op_mp != NULL) - rte_mempool_put(ops[i]->op_mp, ops[i]); - continue; - } - - rte_memcpy(ev, response_info, sizeof(struct rte_event)); + ev->event = ops[i]->impl_opaque[0]; ev->event_ptr = ops[i]; ev->event_type = RTE_EVENT_TYPE_DMADEV; if (adapter->implicit_release_disabled) diff --git a/lib/eventdev/rte_event_dma_adapter.h b/lib/eventdev/rte_event_dma_adapter.h index e924ab673d..048ddba3f3 100644 --- a/lib/eventdev/rte_event_dma_adapter.h +++ b/lib/eventdev/rte_event_dma_adapter.h @@ -157,24 +157,46 @@ extern "C" { * instance. */ struct rte_event_dma_adapter_op { - struct rte_dma_sge *src_seg; - /**< Source segments. */ - struct rte_dma_sge *dst_seg; - /**< Destination segments. */ - uint16_t nb_src; - /**< Number of source segments. */ - uint16_t nb_dst; - /**< Number of destination segments. */ uint64_t flags; /**< Flags related to the operation. * @see RTE_DMA_OP_FLAG_* */ - int16_t dma_dev_id; - /**< DMA device ID to be used */ - uint16_t vchan; - /**< DMA vchan ID to be used */ struct rte_mempool *op_mp; /**< Mempool from which op is allocated. */ + enum rte_dma_status_code status; + /**< Status code for this operation. */ + uint32_t rsvd; + /**< Reserved for future use. */ + uint64_t impl_opaque[2]; + /**< Implementation-specific opaque data. + * An dma device implementation use this field to hold + * implementation specific values to share between dequeue and enqueue + * operations. + * The application should not modify this field. + */ + uint64_t user_meta; + /**< Memory to store user specific metadata. + * The dma device implementation should not modify this area. + */ + uint64_t event_meta; + /**< Event metadata that defines event attributes when used in OP_NEW mode. + * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_NEW + * @see struct rte_event::event + */ + int16_t dma_dev_id; + /**< DMA device ID to be used with OP_FORWARD mode. + * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD + */ + uint16_t vchan; + /**< DMA vchan ID to be used with OP_FORWARD mode + * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD + */ + uint16_t nb_src; + /**< Number of source segments. */ + uint16_t nb_dst; + /**< Number of destination segments. */ + struct rte_dma_sge src_dst_seg[0]; + /**< Source and destination segments. */ }; /** -- 2.25.1 ^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v5 2/2] dma/cnxk: remove completion pool 2024-06-07 10:36 ` [PATCH v5 " pbhagavatula @ 2024-06-07 10:36 ` pbhagavatula 2024-06-08 6:16 ` [PATCH v5 1/2] eventdev/dma: reorganize event DMA ops Jerin Jacob 1 sibling, 0 replies; 17+ messages in thread From: pbhagavatula @ 2024-06-07 10:36 UTC (permalink / raw) To: jerinj, Vamsi Attunuru, Pavan Nikhilesh, Shijith Thotton; +Cc: dev From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use DMA ops to store metadata, remove use of completion pool. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Vamsi Attunuru <vattunuru@marvell.com> --- drivers/dma/cnxk/cnxk_dmadev.c | 53 ++++-------- drivers/dma/cnxk/cnxk_dmadev.h | 24 +----- drivers/dma/cnxk/cnxk_dmadev_fp.c | 101 ++++++----------------- drivers/event/cnxk/cnxk_eventdev_adptr.c | 47 ++--------- 4 files changed, 54 insertions(+), 171 deletions(-) diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c index 4ab3cfbdf2..dfd7222713 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.c +++ b/drivers/dma/cnxk/cnxk_dmadev.c @@ -2,6 +2,8 @@ * Copyright (C) 2021 Marvell International Ltd. */ +#include <rte_event_dma_adapter.h> + #include <cnxk_dmadev.h> static int cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan); @@ -30,8 +32,7 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan) { struct cnxk_dpi_conf *dpi_conf; uint16_t num_vchans; - uint16_t max_desc; - int i, j; + int i; if (vchan == RTE_DMA_ALL_VCHAN) { num_vchans = dpivf->num_vchans; @@ -46,12 +47,6 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan) for (; i < num_vchans; i++) { dpi_conf = &dpivf->conf[i]; - max_desc = dpi_conf->c_desc.max_cnt + 1; - if (dpi_conf->c_desc.compl_ptr) { - for (j = 0; j < max_desc; j++) - rte_free(dpi_conf->c_desc.compl_ptr[j]); - } - rte_free(dpi_conf->c_desc.compl_ptr); dpi_conf->c_desc.compl_ptr = NULL; } @@ -261,7 +256,7 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, if (max_desc > CNXK_DPI_MAX_DESC) max_desc = CNXK_DPI_MAX_DESC; - size = (max_desc * sizeof(struct cnxk_dpi_compl_s *)); + size = (max_desc * sizeof(uint8_t) * CNXK_DPI_COMPL_OFFSET); dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0); if (dpi_conf->c_desc.compl_ptr == NULL) { @@ -269,16 +264,8 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan, return -ENOMEM; } - for (i = 0; i < max_desc; i++) { - dpi_conf->c_desc.compl_ptr[i] = - rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0); - if (!dpi_conf->c_desc.compl_ptr[i]) { - plt_err("Failed to allocate for descriptor memory"); - return -ENOMEM; - } - - dpi_conf->c_desc.compl_ptr[i]->cdata = CNXK_DPI_REQ_CDATA; - } + for (i = 0; i < max_desc; i++) + dpi_conf->c_desc.compl_ptr[i * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; dpi_conf->c_desc.max_cnt = (max_desc - 1); @@ -301,10 +288,8 @@ cnxk_dmadev_start(struct rte_dma_dev *dev) dpi_conf->pnum_words = 0; dpi_conf->pending = 0; dpi_conf->desc_idx = 0; - for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) { - if (dpi_conf->c_desc.compl_ptr[j]) - dpi_conf->c_desc.compl_ptr[j]->cdata = CNXK_DPI_REQ_CDATA; - } + for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) + dpi_conf->c_desc.compl_ptr[j * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; nb_desc += dpi_conf->c_desc.max_cnt + 1; cnxk_stats_reset(dev, i); dpi_conf->completed_offset = 0; @@ -382,22 +367,22 @@ cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls, struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t status; int cnt; for (cnt = 0; cnt < nb_cpls; cnt++) { - comp_ptr = c_desc->compl_ptr[c_desc->head]; - - if (comp_ptr->cdata) { - if (comp_ptr->cdata == CNXK_DPI_REQ_CDATA) + status = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET]; + if (status) { + if (status == CNXK_DPI_REQ_CDATA) break; *has_error = 1; dpi_conf->stats.errors++; + c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = + CNXK_DPI_REQ_CDATA; CNXK_DPI_STRM_INC(*c_desc, head); break; } - - comp_ptr->cdata = CNXK_DPI_REQ_CDATA; + c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; CNXK_DPI_STRM_INC(*c_desc, head); } @@ -414,19 +399,17 @@ cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan, const uint16_t n struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc; - struct cnxk_dpi_compl_s *comp_ptr; int cnt; for (cnt = 0; cnt < nb_cpls; cnt++) { - comp_ptr = c_desc->compl_ptr[c_desc->head]; - status[cnt] = comp_ptr->cdata; + status[cnt] = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET]; if (status[cnt]) { if (status[cnt] == CNXK_DPI_REQ_CDATA) break; dpi_conf->stats.errors++; } - comp_ptr->cdata = CNXK_DPI_REQ_CDATA; + c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA; CNXK_DPI_STRM_INC(*c_desc, head); } @@ -593,7 +576,7 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de rdpi = &dpivf->rdpi; rdpi->pci_dev = pci_dev; - rc = roc_dpi_dev_init(rdpi, offsetof(struct cnxk_dpi_compl_s, wqecs)); + rc = roc_dpi_dev_init(rdpi, offsetof(struct rte_event_dma_adapter_op, impl_opaque)); if (rc < 0) goto err_out_free; diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h index 610a360ba2..a80db333a0 100644 --- a/drivers/dma/cnxk/cnxk_dmadev.h +++ b/drivers/dma/cnxk/cnxk_dmadev.h @@ -37,17 +37,12 @@ #define CNXK_DPI_MAX_CMD_SZ CNXK_DPI_CMD_LEN(CNXK_DPI_MAX_POINTER, \ CNXK_DPI_MAX_POINTER) #define CNXK_DPI_CHUNKS_FROM_DESC(cz, desc) (((desc) / (((cz) / 8) / CNXK_DPI_MAX_CMD_SZ)) + 1) - +#define CNXK_DPI_COMPL_OFFSET ROC_CACHE_LINE_SZ /* Set Completion data to 0xFF when request submitted, * upon successful request completion engine reset to completion status */ #define CNXK_DPI_REQ_CDATA 0xFF -/* Set Completion data to 0xDEADBEEF when request submitted for SSO. - * This helps differentiate if the dequeue is called after cnxk enueue. - */ -#define CNXK_DPI_REQ_SSO_CDATA 0xDEADBEEF - union cnxk_dpi_instr_cmd { uint64_t u; struct cn9k_dpi_instr_cmd { @@ -91,24 +86,11 @@ union cnxk_dpi_instr_cmd { } cn10k; }; -struct cnxk_dpi_compl_s { - uint64_t cdata; - void *op; - uint16_t dev_id; - uint16_t vchan; - uint32_t wqecs; -}; - struct cnxk_dpi_cdesc_data_s { - struct cnxk_dpi_compl_s **compl_ptr; uint16_t max_cnt; uint16_t head; uint16_t tail; -}; - -struct cnxk_dma_adapter_info { - bool enabled; /* Set if vchan queue is added to dma adapter. */ - struct rte_mempool *req_mp; /* DMA inflight request mempool. */ + uint8_t *compl_ptr; }; struct cnxk_dpi_conf { @@ -119,7 +101,7 @@ struct cnxk_dpi_conf { uint16_t desc_idx; struct rte_dma_stats stats; uint64_t completed_offset; - struct cnxk_dma_adapter_info adapter_info; + bool adapter_enabled; }; struct cnxk_dpi_vf_s { diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c index 8a3c0c1008..26591235c6 100644 --- a/drivers/dma/cnxk/cnxk_dmadev_fp.c +++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c @@ -245,14 +245,14 @@ cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t d struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD]; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; int rc; if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); cmd[0] = (1UL << 54) | (1UL << 48); @@ -301,7 +301,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; const struct rte_dma_sge *fptr, *lptr; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; uint64_t hdr[4]; int rc; @@ -309,7 +309,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); hdr[1] = dpi_conf->cmd.u | ((flags & RTE_DMA_OP_FLAG_AUTO_FREE) << 37); @@ -357,14 +357,14 @@ cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD]; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; int rc; if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) == dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); cmd[0] = dpi_conf->cmd.u | (1U << 6) | 1U; @@ -403,7 +403,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge { struct cnxk_dpi_vf_s *dpivf = dev_private; struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan]; - struct cnxk_dpi_compl_s *comp_ptr; + uint8_t *comp_ptr; uint64_t hdr[4]; int rc; @@ -411,7 +411,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge dpi_conf->c_desc.head)) return -ENOSPC; - comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail]; + comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET]; CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail); hdr[0] = dpi_conf->cmd.u | (nb_dst << 6) | nb_src; @@ -454,10 +454,8 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) { const struct rte_dma_sge *src, *dst; struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; - struct rte_event *rsp_info; struct cn10k_sso_hws *work; uint16_t nb_src, nb_dst; rte_mcslock_t mcs_lock_me; @@ -469,34 +467,23 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) for (count = 0; count < nb_events; count++) { op = ev[count].event_ptr; - rsp_info = (struct rte_event *)((uint8_t *)op + - sizeof(struct rte_event_dma_adapter_op)); - dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; - if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) - return count; - - comp_ptr->op = op; - comp_ptr->dev_id = op->dma_dev_id; - comp_ptr->vchan = op->vchan; - comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; - nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54); hdr[0] |= (nb_dst << 6) | nb_src; - hdr[1] = ((uint64_t)comp_ptr); - hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event); + hdr[1] = (uint64_t)op; + hdr[2] = cnxk_dma_adapter_format_event(ev[count].event); src = &op->src_dst_seg[0]; dst = &op->src_dst_seg[op->nb_src]; if (CNXK_TAG_IS_HEAD(work->gw_rdata) || ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) && - (rsp_info->sched_type & DPI_HDR_TT_MASK) == - RTE_SCHED_TYPE_ORDERED)) + (ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)) roc_sso_hws_head_wait(work->base); rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); @@ -528,7 +515,6 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event { const struct rte_dma_sge *fptr, *lptr; struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cn9k_sso_hws_dual *work; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; @@ -548,16 +534,8 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; - if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) - return count; - - comp_ptr->op = op; - comp_ptr->dev_id = op->dma_dev_id; - comp_ptr->vchan = op->vchan; - comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; - hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36); - hdr[2] = (uint64_t)comp_ptr; + hdr[2] = (uint64_t)op; nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; @@ -609,10 +587,8 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) { const struct rte_dma_sge *fptr, *lptr; struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; - struct rte_event *rsp_info; struct cn9k_sso_hws *work; uint16_t nb_src, nb_dst; rte_mcslock_t mcs_lock_me; @@ -624,21 +600,11 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) for (count = 0; count < nb_events; count++) { op = ev[count].event_ptr; - rsp_info = (struct rte_event *)((uint8_t *)op + - sizeof(struct rte_event_dma_adapter_op)); - dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; dpi_conf = &dpivf->conf[op->vchan]; - if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr))) - return count; - - comp_ptr->op = op; - comp_ptr->dev_id = op->dma_dev_id; - comp_ptr->vchan = op->vchan; - comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA; - hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36); - hdr[2] = (uint64_t)comp_ptr; + hdr[2] = (uint64_t)op; nb_src = op->nb_src & CNXK_DPI_MAX_POINTER; nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER; @@ -656,9 +622,9 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events) } hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48; - hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event); + hdr[0] |= cnxk_dma_adapter_format_event(ev[count].event); - if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED) + if ((ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED) roc_sso_hws_head_wait(work->base); rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); @@ -689,38 +655,23 @@ uintptr_t cnxk_dma_adapter_dequeue(uintptr_t get_work1) { struct rte_event_dma_adapter_op *op; - struct cnxk_dpi_compl_s *comp_ptr; struct cnxk_dpi_conf *dpi_conf; struct cnxk_dpi_vf_s *dpivf; - rte_mcslock_t mcs_lock_me; - RTE_ATOMIC(uint8_t) *wqecs; - comp_ptr = (struct cnxk_dpi_compl_s *)get_work1; + op = (struct rte_event_dma_adapter_op *)get_work1; + dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private; + dpi_conf = &dpivf->conf[op->vchan]; - /* Dequeue can be called without calling cnx_enqueue in case of - * dma_adapter. When its called from adapter, dma op will not be - * embedded in completion pointer. In those cases return op. - */ - if (comp_ptr->cdata != CNXK_DPI_REQ_SSO_CDATA) - return (uintptr_t)comp_ptr; - - dpivf = rte_dma_fp_objs[comp_ptr->dev_id].dev_private; - dpi_conf = &dpivf->conf[comp_ptr->vchan]; - - rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me); - wqecs = (uint8_t __rte_atomic *)&comp_ptr->wqecs; - if (rte_atomic_load_explicit(wqecs, rte_memory_order_relaxed) != 0) - dpi_conf->stats.errors++; + if (rte_atomic_load_explicit((RTE_ATOMIC(uint64_t) *)&op->impl_opaque[0], + rte_memory_order_relaxed) != 0) + rte_atomic_fetch_add_explicit((RTE_ATOMIC(uint64_t) *)&dpi_conf->stats.errors, 1, + rte_memory_order_relaxed); /* Take into account errors also. This is similar to * cnxk_dmadev_completed_status(). */ - dpi_conf->stats.completed++; - rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me); - - op = (struct rte_event_dma_adapter_op *)comp_ptr->op; - - rte_mempool_put(dpi_conf->adapter_info.req_mp, comp_ptr); + rte_atomic_fetch_add_explicit((RTE_ATOMIC(uint64_t) *)&dpi_conf->stats.completed, 1, + rte_memory_order_relaxed); return (uintptr_t)op; } diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index a2a59b16c9..98db11ad61 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -739,31 +739,6 @@ cnxk_crypto_adapter_qp_del(const struct rte_cryptodev *cdev, return 0; } -static int -dma_adapter_vchan_setup(const int16_t dma_dev_id, struct cnxk_dpi_conf *vchan, - uint16_t vchan_id) -{ - char name[RTE_MEMPOOL_NAMESIZE]; - uint32_t cache_size, nb_req; - unsigned int req_size; - - snprintf(name, RTE_MEMPOOL_NAMESIZE, "cnxk_dma_req_%u:%u", dma_dev_id, vchan_id); - req_size = sizeof(struct cnxk_dpi_compl_s); - - nb_req = vchan->c_desc.max_cnt; - cache_size = 16; - nb_req += (cache_size * rte_lcore_count()); - - vchan->adapter_info.req_mp = rte_mempool_create(name, nb_req, req_size, cache_size, 0, - NULL, NULL, NULL, NULL, rte_socket_id(), 0); - if (vchan->adapter_info.req_mp == NULL) - return -ENOMEM; - - vchan->adapter_info.enabled = true; - - return 0; -} - int cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, const int16_t dma_dev_id, uint16_t vchan_id) @@ -772,7 +747,6 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, uint32_t adptr_xae_cnt = 0; struct cnxk_dpi_vf_s *dpivf; struct cnxk_dpi_conf *vchan; - int ret; dpivf = rte_dma_fp_objs[dma_dev_id].dev_private; if ((int16_t)vchan_id == -1) { @@ -780,19 +754,13 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) { vchan = &dpivf->conf[vchan_id]; - ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id); - if (ret) { - cnxk_dma_adapter_vchan_del(dma_dev_id, -1); - return ret; - } - adptr_xae_cnt += vchan->adapter_info.req_mp->size; + vchan->adapter_enabled = true; + adptr_xae_cnt += vchan->c_desc.max_cnt; } } else { vchan = &dpivf->conf[vchan_id]; - ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id); - if (ret) - return ret; - adptr_xae_cnt = vchan->adapter_info.req_mp->size; + vchan->adapter_enabled = true; + adptr_xae_cnt = vchan->c_desc.max_cnt; } /* Update dma adapter XAE count */ @@ -805,8 +773,7 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev, static int dma_adapter_vchan_free(struct cnxk_dpi_conf *vchan) { - rte_mempool_free(vchan->adapter_info.req_mp); - vchan->adapter_info.enabled = false; + vchan->adapter_enabled = false; return 0; } @@ -823,12 +790,12 @@ cnxk_dma_adapter_vchan_del(const int16_t dma_dev_id, uint16_t vchan_id) for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) { vchan = &dpivf->conf[vchan_id]; - if (vchan->adapter_info.enabled) + if (vchan->adapter_enabled) dma_adapter_vchan_free(vchan); } } else { vchan = &dpivf->conf[vchan_id]; - if (vchan->adapter_info.enabled) + if (vchan->adapter_enabled) dma_adapter_vchan_free(vchan); } -- 2.25.1 ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v5 1/2] eventdev/dma: reorganize event DMA ops 2024-06-07 10:36 ` [PATCH v5 " pbhagavatula 2024-06-07 10:36 ` [PATCH v5 2/2] dma/cnxk: remove completion pool pbhagavatula @ 2024-06-08 6:16 ` Jerin Jacob 1 sibling, 0 replies; 17+ messages in thread From: Jerin Jacob @ 2024-06-08 6:16 UTC (permalink / raw) To: pbhagavatula; +Cc: jerinj, Amit Prakash Shukla, Vamsi Attunuru, dev On Fri, Jun 7, 2024 at 11:53 PM <pbhagavatula@marvell.com> wrote: > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Re-organize event DMA ops structure to allow holding > source and destination pointers without the need for > additional memory, the mempool allocating memory for > rte_event_dma_adapter_ops can size the structure to > accommodate all the needed source and destination > pointers. > > Add multiple words for holding user metadata, adapter > implementation specific metadata and event metadata. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com> > --- > v5 Changes: > - Update release notes with Experimental API changes. > v4 Changes: > - Reduce unreleated driver changes and move to 2/2. > v3 Changes: > - Fix stdatomic compilation. > v2 Changes: > - Fix 32bit compilation > > . > diff --git a/doc/guides/rel_notes/release_24_07.rst b/doc/guides/rel_notes/release_24_07.rst > index a69f24cf99..7800cb4c31 100644 > --- a/doc/guides/rel_notes/release_24_07.rst > +++ b/doc/guides/rel_notes/release_24_07.rst > @@ -84,6 +84,9 @@ API Changes It is not API change. Applied following diff and Applied series to dpdk-next-eventdev/for-main. Thanks [for-main][dpdk-next-eventdev] $ git diff diff --git a/doc/guides/rel_notes/release_24_07.rst b/doc/guides/rel_notes/release_24_07.rst index 09e58dddf2..14bd5d37b1 100644 --- a/doc/guides/rel_notes/release_24_07.rst +++ b/doc/guides/rel_notes/release_24_07.rst @@ -91,9 +91,6 @@ API Changes Also, make sure to start the actual text at the margin. ======================================================= -* eventdev: Reorganize the fastpath structure ``rte_event_dma_adapter_op`` - to optimize the memory layout and improve performance. - ABI Changes ----------- @@ -112,6 +109,9 @@ ABI Changes * No ABI change that would break compatibility with 23.11. +* eventdev/dma: Reorganize the experimental fastpath structure ``rte_event_dma_adapter_op`` + to optimize the memory layout and improve performance. + > Also, make sure to start the actual text at the margin. > ======================================================= > > +* eventdev: Reorganize the fastpath structure ``rte_event_dma_adapter_op`` > + to optimize the memory layout and improve performance. > + > > ABI Changes ^ permalink raw reply [flat|nested] 17+ messages in thread
* RE: [PATCH 1/2] eventdev/dma: reorganize event DMA ops 2024-04-06 10:13 [PATCH 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula 2024-04-06 10:13 ` [PATCH 2/2] dma/cnxk: remove completion pool pbhagavatula 2024-04-17 5:58 ` [PATCH v2 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula @ 2024-05-16 7:36 ` Amit Prakash Shukla 2 siblings, 0 replies; 17+ messages in thread From: Amit Prakash Shukla @ 2024-05-16 7:36 UTC (permalink / raw) To: Pavan Nikhilesh Bhagavatula, Jerin Jacob, Vamsi Krishna Attunuru Cc: dev, Pavan Nikhilesh Bhagavatula > -----Original Message----- > From: pbhagavatula@marvell.com <pbhagavatula@marvell.com> > Sent: Saturday, April 6, 2024 3:43 PM > To: Jerin Jacob <jerinj@marvell.com>; Amit Prakash Shukla > <amitprakashs@marvell.com>; Vamsi Krishna Attunuru > <vattunuru@marvell.com> > Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula > <pbhagavatula@marvell.com> > Subject: [PATCH 1/2] eventdev/dma: reorganize event DMA ops > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Re-organize event DMA ops structure to allow holding source and destination > pointers without the need for additional memory, the mempool allocating > memory for rte_event_dma_adapter_ops can size the structure to > accommodate all the needed source and destination pointers. > > Add multiple words for holding user metadata, adapter implementation > specific metadata and event metadata. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com> ^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2024-06-08 6:17 UTC | newest] Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2024-04-06 10:13 [PATCH 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula 2024-04-06 10:13 ` [PATCH 2/2] dma/cnxk: remove completion pool pbhagavatula 2024-04-16 8:56 ` Vamsi Krishna Attunuru 2024-04-17 5:58 ` [PATCH v2 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula 2024-04-17 5:58 ` [PATCH v2 2/2] dma/cnxk: remove completion pool pbhagavatula 2024-04-17 8:26 ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula 2024-04-17 8:26 ` [PATCH v3 2/2] dma/cnxk: remove completion pool pbhagavatula 2024-05-16 7:39 ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops Amit Prakash Shukla 2024-05-30 12:23 ` Jerin Jacob 2024-05-30 17:35 ` [EXTERNAL] " Pavan Nikhilesh Bhagavatula 2024-05-30 12:44 ` [PATCH v4 " pbhagavatula 2024-05-30 12:44 ` [PATCH v4 2/2] dma/cnxk: remove completion pool pbhagavatula 2024-06-07 10:20 ` [PATCH v4 1/2] eventdev/dma: reorganize event DMA ops Jerin Jacob 2024-06-07 10:36 ` [PATCH v5 " pbhagavatula 2024-06-07 10:36 ` [PATCH v5 2/2] dma/cnxk: remove completion pool pbhagavatula 2024-06-08 6:16 ` [PATCH v5 1/2] eventdev/dma: reorganize event DMA ops Jerin Jacob 2024-05-16 7:36 ` [PATCH " Amit Prakash Shukla
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).