From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- This patch set depends on http://patches.dpdk.org/project/dpdk/list/?series=15515 doc/guides/eventdevs/cnxk.rst | 4 + drivers/event/cnxk/cn10k_eventdev.c | 76 +++++++++++ drivers/event/cnxk/cn10k_worker.h | 4 + drivers/event/cnxk/cn9k_eventdev.c | 82 ++++++++++++ drivers/event/cnxk/cn9k_worker.h | 4 + drivers/event/cnxk/cnxk_eventdev.h | 21 +++ drivers/event/cnxk/cnxk_eventdev_adptr.c | 157 +++++++++++++++++++++++ 7 files changed, 348 insertions(+) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 36da3800cc..03dfcbd6a8 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -39,6 +39,10 @@ Features of the OCTEON cnxk SSO PMD are: time granularity of 2.5us on CN9K and 1us on CN10K. - Up to 256 TIM rings a.k.a event timer adapters. - Up to 8 rings traversed in parallel. +- HW managed packets enqueued from ethdev to eventdev exposed through event eth + RX adapter. +- N:1 ethernet device Rx queue to Event queue mapping. +- Full Rx offload support defined through ethdev queue configuration. Prerequisites and Compilation procedure --------------------------------------- diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index bf4052c76c..66040df060 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -407,6 +407,76 @@ cn10k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn10k)); } +static int +cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn10k_sso_set_lookup_mem(const struct rte_eventdev *event_dev, void *lookup_mem) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + } +} + +static int +cn10k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + void *lookup_mem; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + + lookup_mem = ((struct cn10k_eth_rxq *)eth_dev->data->rx_queues[0]) + ->lookup_mem; + cn10k_sso_set_lookup_mem(event_dev, lookup_mem); + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -420,6 +490,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .port_unlink = cn10k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 2f093a8dd5..085857bccf 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn10k_ethdev.h" +#include "cn10k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 0684417eab..8e6bf54df9 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -481,6 +481,82 @@ cn9k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn9k)); } +static int +cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn9k_sso_set_lookup_mem(const struct rte_eventdev *event_dev, void *lookup_mem) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + dws->lookup_mem = lookup_mem; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + } + } +} + +static int +cn9k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + void *lookup_mem; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + + lookup_mem = ((struct cn9k_eth_rxq *)eth_dev->data->rx_queues[0]) + ->lookup_mem; + cn9k_sso_set_lookup_mem(event_dev, lookup_mem); + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -494,6 +570,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .port_unlink = cn9k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 38fca08fb6..f5a4401465 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN9K_WORKER_H__ #define __CN9K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn9k_ethdev.h" +#include "cn9k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 668e51d62a..6e0bb8ac5c 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -6,6 +6,8 @@ #define __CNXK_EVENTDEV_H__ #include <rte_devargs.h> +#include <rte_ethdev.h> +#include <rte_event_eth_rx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -81,7 +83,10 @@ struct cnxk_sso_evdev { uint64_t nb_xaq_cfg; rte_iova_t fc_iova; struct rte_mempool *xaq_pool; + uint64_t rx_offloads; uint64_t adptr_xae_cnt; + uint16_t rx_adptr_pool_cnt; + uint64_t *rx_adptr_pools; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -108,6 +113,7 @@ struct cnxk_sso_evdev { struct cn10k_sso_hws { /* Get Work Fastpath data */ CN10K_SSO_HWS_OPS; + void *lookup_mem; uint32_t gw_wdata; uint8_t swtag_req; uint8_t hws_id; @@ -132,6 +138,7 @@ struct cn10k_sso_hws { struct cn9k_sso_hws { /* Get Work Fastpath data */ CN9K_SSO_HWS_OPS; + void *lookup_mem; uint8_t swtag_req; uint8_t hws_id; /* Add Work Fastpath data */ @@ -148,6 +155,7 @@ struct cn9k_sso_hws_state { struct cn9k_sso_hws_dual { /* Get Work Fastpath data */ struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */ + void *lookup_mem; uint8_t swtag_req; uint8_t vws; /* Ping pong bit */ uint8_t hws_id; @@ -250,4 +258,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev, /* CN9K */ void cn9k_sso_set_rsrc(void *arg); +/* Common adapter ops */ +int cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf); +int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id); +int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); +int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); + #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 89a1d82c14..8de7b6f895 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -2,6 +2,7 @@ * Copyright(C) 2021 Marvell. */ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" void @@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, int i; switch (event_type) { + case RTE_EVENT_TYPE_ETHDEV: { + struct cnxk_eth_rxq_sp *rxq = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->rx_adptr_pool_cnt; i++) { + if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i]) + return; + } + + dev->rx_adptr_pool_cnt++; + old_ptr = dev->rx_adptr_pools; + dev->rx_adptr_pools = rte_realloc( + dev->rx_adptr_pools, + sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0); + if (dev->rx_adptr_pools == NULL) { + dev->adptr_xae_cnt += rxq->qconf.mp->size; + dev->rx_adptr_pools = old_ptr; + dev->rx_adptr_pool_cnt--; + return; + } + dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] = + (uint64_t)rxq->qconf.mp; + + dev->adptr_xae_cnt += rxq->qconf.mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; @@ -65,3 +92,133 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, break; } } + +static int +cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, + uint16_t port_id, const struct rte_event *ev, + uint8_t custom_flowid) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 1; + rq->tt = ev->sched_type; + rq->hwgrp = ev->queue_id; + rq->flow_tag_width = 20; + rq->wqe_skip = 1; + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4)) + << 24; + + if (custom_flowid) { + rq->flow_tag_width = 0; + rq->tag_mask |= ev->flow_id; + } + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 0; + rq->flow_tag_width = 32; + rq->tag_mask = 0; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +int +cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t port = eth_dev->data->port_id; + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + rxq_sp = eth_dev->data->rx_queues[i]; + rxq_sp = rxq_sp - 1; + cnxk_sso_updt_xae_cnt(dev, rxq_sp, + RTE_EVENT_TYPE_ETHDEV); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc |= cnxk_sso_rxq_enable( + cnxk_eth_dev, i, port, &queue_conf->ev, + !!(queue_conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID)); + } + } else { + rxq_sp = eth_dev->data->rx_queues[rx_queue_id]; + rxq_sp = rxq_sp - 1; + cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc |= cnxk_sso_rxq_enable( + cnxk_eth_dev, (uint16_t)rx_queue_id, port, + &queue_conf->ev, + !!(queue_conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID)); + } + + if (rc < 0) { + plt_err("Failed to configure Rx adapter port=%d, q=%d", port, + queue_conf->ev.queue_id); + return rc; + } + + dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags; + + return 0; +} + +int +cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + struct cnxk_eth_dev *dev = eth_dev->data->dev_private; + int i, rc = 0; + + RTE_SET_USED(event_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) + rc = cnxk_sso_rxq_disable(dev, i); + } else { + rc = cnxk_sso_rxq_disable(dev, (uint16_t)rx_queue_id); + } + + if (rc < 0) + plt_err("Failed to clear Rx adapter config port=%d, q=%d", + eth_dev->data->port_id, rx_queue_id); + + return rc; +} + +int +cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +int +cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 115 ++++++++- drivers/event/cnxk/cn10k_worker.c | 164 +++++++++---- drivers/event/cnxk/cn10k_worker.h | 91 +++++-- drivers/event/cnxk/cn9k_eventdev.c | 254 ++++++++++++++++++- drivers/event/cnxk/cn9k_worker.c | 364 +++++++++++++++++++--------- drivers/event/cnxk/cn9k_worker.h | 158 +++++++++--- drivers/event/cnxk/meson.build | 7 + 7 files changed, 931 insertions(+), 222 deletions(-) diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 66040df060..b1ad5b2878 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -247,17 +247,120 @@ static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + const event_dequeue_t sso_hws_deq[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn10k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn10k_sso_hws_tmo_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn10k_sso_hws_tmo_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn10k_sso_hws_tmo_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_tmo_deq_seg_burst[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn10k_sso_hws_tmo_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn10k_sso_hws_deq; - event_dev->dequeue_burst = cn10k_sso_hws_deq_burst; - if (dev->is_timeout_deq) { - event_dev->dequeue = cn10k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } } diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index e2aa534c64..4365aec992 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -60,56 +60,118 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } -uint16_t __rte_hot -cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); - return 1; +#define R(name, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_tmo_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_tmo_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_tmo_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_tmo_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_tmo_deq_seg_##name(port, ev, \ + timeout_ticks); \ } - return cn10k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); - return ret; - } - - ret = cn10k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn10k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 085857bccf..ad320d2dc0 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -83,20 +83,40 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, cn10k_sso_hws_fwd_group(ws, ev, grp); } +static __rte_always_inline void +cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + union mbuf_initializer mbuf_init = { + .fields = {.data_off = RTE_PKTMBUF_HEADROOM, + .refcnt = 1, + .nb_segs = 1, + .port = port_id}, + }; + + cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init.value, flags); +} + static __rte_always_inline uint16_t -cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) +cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, void *lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; gw.get_work = ws->gw_wdata; #if defined(RTE_ARCH_ARM64) && !defined(__clang__) asm volatile( PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" - : [wdata] "+r"(gw.get_work) + "sub %[mbuf], %H[wdata], #0x80 \n" + : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf) : [gw_loc] "r"(ws->getwrk_op) : "memory"); #else @@ -104,11 +124,25 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) do { roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -123,6 +157,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -133,19 +168,34 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " ldp %[tag], %[wqp], [%[tag_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_wqe_op) : "memory"); #else do { roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -164,16 +214,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_tmo_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_tmo_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_tmo_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 8e6bf54df9..16acea4cda 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -252,17 +252,179 @@ static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + /* Single WS modes */ + const event_dequeue_t sso_hws_deq[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_tmo_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_tmo_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_tmo_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_tmo_deq_seg_burst[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_tmo_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + /* Dual WS modes */ + const event_dequeue_t sso_hws_dual_deq[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_tmo_deq[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_dual_tmo_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_dual_tmo_deq_burst[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_dual_tmo_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_dual_deq_seg_burst[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_tmo_deq_seg[2][2][2][2] = { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_dual_tmo_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_dual_tmo_deq_seg_burst[2][2][2][2] = + { +#define R(name, f3, f2, f1, f0, flags) \ + [f3][f2][f1][f0] = cn9k_sso_hws_dual_tmo_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn9k_sso_hws_deq; - event_dev->dequeue_burst = cn9k_sso_hws_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } if (dev->dual_ws) { @@ -272,14 +434,82 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) event_dev->enqueue_forward_burst = cn9k_sso_hws_dual_enq_fwd_burst; - event_dev->dequeue = cn9k_sso_hws_dual_deq; - event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq; - event_dev->dequeue_burst = - cn9k_sso_hws_dual_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_dual_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_tmo_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_tmo_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_dual_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_tmo_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_tmo_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } } } + + rte_mb(); } static void * diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c index 9ceacc98dd..0f031a5fa3 100644 --- a/drivers/event/cnxk/cn9k_worker.c +++ b/drivers/event/cnxk/cn9k_worker.c @@ -60,59 +60,121 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } -uint16_t __rte_hot -cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return 1; +#define R(name, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_tmo_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_tmo_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_tmo_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_tmo_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_tmo_deq_seg_##name(port, ev, \ + timeout_ticks); \ } - return cn9k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return ret; - } - - ret = cn9k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn9k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} +NIX_RX_FASTPATH_MODES +#undef R /* Dual ws ops. */ @@ -172,65 +234,145 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } -uint16_t __rte_hot -cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t gw; - - RTE_SET_USED(timeout_ticks); - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return 1; +#define R(name, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, \ + flags, dws->lookup_mem); \ + dws->vws = !dws->vws; \ + return gw; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_tmo_deq_##name(port, ev, \ + timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, \ + flags, dws->lookup_mem); \ + dws->vws = !dws->vws; \ + return gw; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \ + timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_tmo_deq_seg_##name(port, ev, \ + timeout_ticks); \ } - gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - return gw; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t ret = 1; - uint64_t iter; - - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return ret; - } - - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - } - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks); -} +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index f5a4401465..1fde652ff8 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -128,17 +128,38 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, } } +static __rte_always_inline void +cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + union mbuf_initializer mbuf_init = { + .fields = {.data_off = RTE_PKTMBUF_HEADROOM, + .refcnt = 1, + .nb_segs = 1, + .port = port_id}, + }; + + cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init.value, flags); +} + static __rte_always_inline uint16_t cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, struct cn9k_sso_hws_state *ws_pair, - struct rte_event *ev) + struct rte_event *ev, const uint32_t flags, + const void *const lookup_mem) { const uint64_t set_gw = BIT_ULL(16) | 1; union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE "rty%=: \n" @@ -147,7 +168,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, " tbnz %[tag], 63, rty%= \n" "done%=: str %[gw], [%[pong]] \n" " dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op), [gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op)); #else @@ -156,12 +180,26 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); plt_write64(set_gw, ws_pair->getwrk_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -169,16 +207,21 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, } static __rte_always_inline uint16_t -cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) +cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, const void *const lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; plt_write64(BIT_ULL(16) | /* wait for work. */ 1, /* Use Mask set 0. */ ws->getwrk_op); + + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE " ldr %[tag], [%[tag_loc]] \n" @@ -190,7 +233,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -198,12 +244,26 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -218,6 +278,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -230,7 +291,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -238,12 +301,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -274,28 +350,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); - -uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_tmo_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_tmo_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_tmo_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R + +#define R(name, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index 87bb9f76a9..0a3bcffd64 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -8,6 +8,13 @@ if not is_linux or not dpdk_conf.get('RTE_ARCH_64') subdir_done() endif +extra_flags = ['-Wno-strict-aliasing'] +foreach flag: extra_flags + if cc.has_argument(flag) + cflags += flag + endif +endforeach + sources = files( 'cn9k_eventdev.c', 'cn9k_worker.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 4 +- drivers/event/cnxk/cn10k_eventdev.c | 90 +++++++++++++++++ drivers/event/cnxk/cn9k_eventdev.c | 117 +++++++++++++++++++++++ drivers/event/cnxk/cnxk_eventdev.h | 22 ++++- drivers/event/cnxk/cnxk_eventdev_adptr.c | 106 ++++++++++++++++++++ 5 files changed, 335 insertions(+), 4 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 03dfcbd6a8..502bac17e0 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -42,7 +42,9 @@ Features of the OCTEON cnxk SSO PMD are: - HW managed packets enqueued from ethdev to eventdev exposed through event eth RX adapter. - N:1 ethernet device Rx queue to Event queue mapping. -- Full Rx offload support defined through ethdev queue configuration. +- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` + capability while maintaining receive packet order. +- Full Rx/Tx offload support defined through ethdev queue configuration. Prerequisites and Compilation procedure --------------------------------------- diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index b1ad5b2878..99d2b7a8ba 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -243,6 +243,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn10k_sso_hws) + + (sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + + return 0; +} + static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -482,6 +515,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset, cn10k_sso_hws_flush_events); if (rc < 0) @@ -580,6 +617,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + return cn10k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -599,6 +685,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 16acea4cda..2f071f19ea 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(dws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + dws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&dws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = dws; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + } + rte_mb(); + + return 0; +} + static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -683,6 +743,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset, cn9k_sso_hws_flush_events); if (rc < 0) @@ -787,6 +851,55 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + return cn9k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -806,6 +919,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 6e0bb8ac5c..57c3327aa0 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -8,6 +8,7 @@ #include <rte_devargs.h> #include <rte_ethdev.h> #include <rte_event_eth_rx_adapter.h> +#include <rte_event_eth_tx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -84,9 +85,12 @@ struct cnxk_sso_evdev { rte_iova_t fc_iova; struct rte_mempool *xaq_pool; uint64_t rx_offloads; + uint64_t tx_offloads; uint64_t adptr_xae_cnt; uint16_t rx_adptr_pool_cnt; uint64_t *rx_adptr_pools; + uint64_t *tx_adptr_data; + uint16_t max_port_id; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -121,8 +125,10 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; + /* Tx Fastpath data */ + uint64_t base __rte_cache_aligned; uintptr_t lmt_base; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; /* CN9K HWS ops */ @@ -145,7 +151,9 @@ struct cn9k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; + /* Tx Fastpath data */ + uint64_t base __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cn9k_sso_hws_state { @@ -163,7 +171,9 @@ struct cn9k_sso_hws_dual { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base[2]; + /* Tx Fastpath data */ + uint64_t base[2] __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cnxk_sso_hws_cookie { @@ -270,5 +280,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); +int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); +int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 8de7b6f895..d4d07c793f 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -5,6 +5,8 @@ #include "cnxk_ethdev.h" #include "cnxk_eventdev.h" +#define CNXK_SSO_SQB_LIMIT (0x180) + void cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, uint32_t event_type) @@ -222,3 +224,107 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, return 0; } + +static int +cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs) +{ + uint16_t sqb_limit; + + sqb_limit = RTE_MIN(nb_sqb_bufs, sq->nb_sqb_bufs); + return roc_npa_aura_limit_modify(sq->aura_handle, sqb_limit); +} + +static int +cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev, + uint16_t eth_port_id, uint16_t tx_queue_id, + void *txq) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t max_port_id = dev->max_port_id; + uint64_t *txq_data = dev->tx_adptr_data; + + if (txq_data == NULL || eth_port_id > max_port_id) { + max_port_id = RTE_MAX(max_port_id, eth_port_id); + txq_data = rte_realloc_socket( + txq_data, + (sizeof(uint64_t) * (max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, event_dev->data->socket_id); + if (txq_data == NULL) + return -ENOMEM; + } + + ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) + txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq; + dev->max_port_id = max_port_id; + dev->tx_adptr_data = txq_data; + return 0; +} + +int +cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct roc_nix_sq *sq; + int i, ret; + void *txq; + + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { + txq = eth_dev->data->tx_queues[i]; + sq = &cnxk_eth_dev->sqs[i]; + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, i, txq); + if (ret < 0) + return ret; + } + } else { + txq = eth_dev->data->tx_queues[tx_queue_id]; + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, txq); + if (ret < 0) + return ret; + } + + dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags; + + return 0; +} + +int +cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct roc_nix_sq *sq; + int i, ret; + + RTE_SET_USED(event_dev); + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { + sq = &cnxk_eth_dev->sqs[i]; + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, + NULL); + if (ret < 0) + return ret; + } + } else { + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, NULL); + if (ret < 0) + return ret; + } + + return 0; +} -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 35 ++++++++++++ drivers/event/cnxk/cn10k_worker.c | 32 +++++++++++ drivers/event/cnxk/cn10k_worker.h | 67 ++++++++++++++++++++++ drivers/event/cnxk/cn9k_eventdev.c | 76 +++++++++++++++++++++++++ drivers/event/cnxk/cn9k_worker.c | 60 ++++++++++++++++++++ drivers/event/cnxk/cn9k_worker.h | 87 +++++++++++++++++++++++++++++ 6 files changed, 357 insertions(+) diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 99d2b7a8ba..817cb08480 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -336,6 +336,22 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue sso_hws_tx_adptr_enq[2][2][2][2][2] = { +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + [f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue sso_hws_tx_adptr_enq_seg[2][2][2][2][2] = + { +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + [f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; @@ -395,6 +411,25 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; } static void diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index 4365aec992..fb26e17034 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -175,3 +175,35 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], NIX_RX_FASTPATH_MODES #undef R + +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index ad320d2dc0..b3f71202ad 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -11,6 +11,7 @@ #include "cn10k_ethdev.h" #include "cn10k_rx.h" +#include "cn10k_tx.h" /* SSO Operations */ @@ -239,4 +240,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn10k_eth_txq * +cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn10k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline uint16_t +cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, + uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + const struct cn10k_eth_txq *txq; + struct rte_mbuf *m = ev->mbuf; + uint16_t ref_cnt = m->refcnt; + uintptr_t lmt_addr; + uint16_t lmt_id; + uintptr_t pa; + + lmt_addr = ws->lmt_base; + ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + txq = cn10k_sso_hws_xtract_meta(m, txq_data); + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; + } + if (!ev->sched_type) + cnxk_sso_hws_head_wait(ws->base + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_TAG, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 2f071f19ea..a1206dcb61 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -427,6 +427,38 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue sso_hws_tx_adptr_enq[2][2][2][2][2] = { +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + [f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue sso_hws_tx_adptr_enq_seg[2][2][2][2][2] = + { +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + [f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq[2][2][2][2][2] = { +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + [f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2] = { +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + [f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; @@ -487,6 +519,23 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) } } + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + if (dev->dual_ws) { event_dev->enqueue = cn9k_sso_hws_dual_enq; event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst; @@ -567,8 +616,35 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] + */ + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } } + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; rte_mb(); } diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c index 0f031a5fa3..0ffeeeb93a 100644 --- a/drivers/event/cnxk/cn9k_worker.c +++ b/drivers/event/cnxk/cn9k_worker.c @@ -376,3 +376,63 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], NIX_RX_FASTPATH_MODES #undef R + +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws_dual *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws_dual *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 1fde652ff8..9ffb8df5b8 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -11,6 +11,7 @@ #include "cn9k_ethdev.h" #include "cn9k_rx.h" +#include "cn9k_tx.h" /* SSO Operations */ @@ -400,4 +401,90 @@ NIX_RX_FASTPATH_MODES NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn9k_eth_txq * +cn9k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn9k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline void +cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m, + uint64_t *cmd, const uint32_t flags) +{ + roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags)); + cn9k_nix_xmit_prepare(m, cmd, flags); +} + +static __rte_always_inline uint16_t +cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + struct rte_mbuf *m = ev->mbuf; + const struct cn9k_eth_txq *txq; + uint16_t ref_cnt = m->refcnt; + + /* Perform header writes before barrier for TSO */ + cn9k_nix_xmit_prepare_tso(m, flags); + /* Lets commit any changes in the packet here in case when + * fast free is set as no further changes will be made to mbuf. + * In case of fast free is not set, both cn9k_nix_prepare_mseg() + * and cn9k_nix_xmit_prepare() has a barrier after refcnt update. + */ + if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_io_wmb(); + txq = cn9k_sso_hws_xtract_meta(m, txq_data); + cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags); + + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); + if (!ev->sched_type) { + cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, + txq->io_addr, segdw); + } else { + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, + segdw); + } + } else { + if (!ev->sched_type) { + cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_one(cmd, txq->lmt_addr, + txq->io_addr, flags); + } else { + cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, + flags); + } + } + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG, + base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add multi-segment Rx vector routine, form the primary mbufs using vector path switch to scalar path when extracting segments. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- Depends-on: http://patches.dpdk.org/project/dpdk/list/?series=17394 drivers/net/cnxk/cn10k_rx.c | 31 +++++++++++------ drivers/net/cnxk/cn10k_rx.h | 51 +++++++++++++++++++++------- drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++ drivers/net/cnxk/cn9k_rx.c | 31 +++++++++++------ drivers/net/cnxk/cn9k_rx.h | 51 +++++++++++++++++++++------- drivers/net/cnxk/cn9k_rx_vec_mseg.c | 18 ++++++++++ drivers/net/cnxk/meson.build | 2 ++ 7 files changed, 157 insertions(+), 44 deletions(-) create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c index 5c956c06b4..3a9fd71309 100644 --- a/drivers/net/cnxk/cn10k_rx.c +++ b/drivers/net/cnxk/cn10k_rx.c @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev, [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)]; + + rte_atomic_thread_fence(__ATOMIC_RELEASE); } void @@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev) #undef R }; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) - pick_rx_func(eth_dev, nix_eth_rx_burst); - else - pick_rx_func(eth_dev, nix_eth_rx_vec_burst); + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name, - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + NIX_RX_FASTPATH_MODES +#undef R + }; /* Copy multi seg version with no offload for tear down sequence */ if (rte_eal_process_type() == RTE_PROC_PRIMARY) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - rte_mb(); + + /* For PTP enabled, scalar rx function should be chosen as most of the + * PTP apps are implemented to rx burst 1 pkt. + */ + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_burst); + } + + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst); } diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index 1cc37cbaa0..5926ff7f46 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, sg = *(const uint64_t *)(rx + 1); nb_segs = (sg >> 48) & 0x3; - mbuf->nb_segs = nb_segs; + + if (nb_segs == 1) { + mbuf->next = NULL; + return; + } + + mbuf->pkt_len = rx->pkt_lenm1 + 1; mbuf->data_len = sg & 0xFFFF; + mbuf->nb_segs = nb_segs; sg = sg >> 16; eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1)); @@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf); mbuf->ol_flags = ol_flags; - *(uint64_t *)(&mbuf->rearm_data) = val; mbuf->pkt_len = len; + mbuf->data_len = len; + *(uint64_t *)(&mbuf->rearm_data) = val; - if (flag & NIX_RX_MULTI_SEG_F) { + if (flag & NIX_RX_MULTI_SEG_F) nix_cqe_xtract_mseg(rx, mbuf, val); - } else { - mbuf->data_len = len; + else mbuf->next = NULL; - } } static inline uint16_t @@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); - /* Update that no more segments */ - mbuf0->next = NULL; - mbuf1->next = NULL; - mbuf2->next = NULL; - mbuf3->next = NULL; - /* Store the mbufs to rx_pkts */ vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + if (flags & NIX_RX_MULTI_SEG_F) { + /* Multi segment is enable build mseg list for + * individual mbufs in scalar mode. + */ + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer); + } else { + /* Update that no more segments */ + mbuf0->next = NULL; + mbuf1->next = NULL; + mbuf2->next = NULL; + mbuf3->next = NULL; + } + /* Prefetch mbufs */ roc_prefetch_store_keep(mbuf0); roc_prefetch_store_keep(mbuf1); @@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c new file mode 100644 index 0000000000..04d1e46c82 --- /dev/null +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_ethdev.h" +#include "cn10k_rx.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ + { \ + return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ + (flags) | NIX_RX_MULTI_SEG_F); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c index 0acedd0a1f..d293d4eac3 100644 --- a/drivers/net/cnxk/cn9k_rx.c +++ b/drivers/net/cnxk/cn9k_rx.c @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev, [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)]; + + rte_atomic_thread_fence(__ATOMIC_RELEASE); } void @@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev) #undef R }; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) - pick_rx_func(eth_dev, nix_eth_rx_burst); - else - pick_rx_func(eth_dev, nix_eth_rx_vec_burst); + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name, - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + NIX_RX_FASTPATH_MODES +#undef R + }; /* Copy multi seg version with no offload for tear down sequence */ if (rte_eal_process_type() == RTE_PROC_PRIMARY) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - rte_mb(); + + /* For PTP enabled, scalar rx function should be chosen as most of the + * PTP apps are implemented to rx burst 1 pkt. + */ + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_burst); + } + + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst); } diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h index 10ef5c6905..5ae9e8195c 100644 --- a/drivers/net/cnxk/cn9k_rx.h +++ b/drivers/net/cnxk/cn9k_rx.h @@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, sg = *(const uint64_t *)(rx + 1); nb_segs = (sg >> 48) & 0x3; - mbuf->nb_segs = nb_segs; + + if (nb_segs == 1) { + mbuf->next = NULL; + return; + } + + mbuf->pkt_len = rx->pkt_lenm1 + 1; mbuf->data_len = sg & 0xFFFF; + mbuf->nb_segs = nb_segs; sg = sg >> 16; eol = ((const rte_iova_t *)(rx + 1) + @@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf); mbuf->ol_flags = ol_flags; - *(uint64_t *)(&mbuf->rearm_data) = val; mbuf->pkt_len = len; + mbuf->data_len = len; + *(uint64_t *)(&mbuf->rearm_data) = val; - if (flag & NIX_RX_MULTI_SEG_F) { + if (flag & NIX_RX_MULTI_SEG_F) nix_cqe_xtract_mseg(rx, mbuf, val); - } else { - mbuf->data_len = len; + else mbuf->next = NULL; - } } static inline uint16_t @@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); - /* Update that no more segments */ - mbuf0->next = NULL; - mbuf1->next = NULL; - mbuf2->next = NULL; - mbuf3->next = NULL; - /* Store the mbufs to rx_pkts */ vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + if (flags & NIX_RX_MULTI_SEG_F) { + /* Multi segment is enable build mseg list for + * individual mbufs in scalar mode. + */ + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer); + } else { + /* Update that no more segments */ + mbuf0->next = NULL; + mbuf1->next = NULL; + mbuf2->next = NULL; + mbuf3->next = NULL; + } + /* Prefetch mbufs */ roc_prefetch_store_keep(mbuf0); roc_prefetch_store_keep(mbuf1); @@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c new file mode 100644 index 0000000000..e46d8a4749 --- /dev/null +++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_ethdev.h" +#include "cn9k_rx.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ + { \ + return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ + (flags) | \ + NIX_RX_MULTI_SEG_F); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build index 2071d0dcb2..aa8c7253fb 100644 --- a/drivers/net/cnxk/meson.build +++ b/drivers/net/cnxk/meson.build @@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c', 'cn9k_rx.c', 'cn9k_rx_mseg.c', 'cn9k_rx_vec.c', + 'cn9k_rx_vec_mseg.c', 'cn9k_tx.c', 'cn9k_tx_mseg.c', 'cn9k_tx_vec.c') @@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c', 'cn10k_rx.c', 'cn10k_rx_mseg.c', 'cn10k_rx_vec.c', + 'cn10k_rx_vec_mseg.c', 'cn10k_tx.c', 'cn10k_tx_mseg.c', 'cn10k_tx_vec.c') -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable PTP offload in vector Rx burst function, use vector path for processing mbufs and finally switch to scalar when extracting timestamp. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_ethdev.c | 1 - drivers/net/cnxk/cn10k_rx.c | 5 +- drivers/net/cnxk/cn10k_rx.h | 124 ++++++++++++++++++++++++++++---- drivers/net/cnxk/cn10k_rx_vec.c | 3 - drivers/net/cnxk/cn9k_ethdev.c | 1 - drivers/net/cnxk/cn9k_rx.c | 5 +- drivers/net/cnxk/cn9k_rx.h | 124 ++++++++++++++++++++++++++++---- drivers/net/cnxk/cn9k_rx_vec.c | 3 - drivers/net/cnxk/cnxk_ethdev.h | 19 ++--- 9 files changed, 232 insertions(+), 53 deletions(-) diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c index b079edbd35..7caec6cf14 100644 --- a/drivers/net/cnxk/cn10k_ethdev.c +++ b/drivers/net/cnxk/cn10k_ethdev.c @@ -301,7 +301,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev) if (nix_recalc_mtu(eth_dev)) plt_err("Failed to set MTU size for ptp"); - dev->scalar_ena = true; dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F; /* Setting up the function pointers as per new offload flags */ diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c index 3a9fd71309..69e767ac3d 100644 --- a/drivers/net/cnxk/cn10k_rx.c +++ b/drivers/net/cnxk/cn10k_rx.c @@ -75,10 +75,7 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->scalar_ena) { if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); return pick_rx_func(eth_dev, nix_eth_rx_burst); diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index 5926ff7f46..abdd58e888 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -109,7 +109,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags, static __rte_always_inline void nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, - uint64_t rearm) + uint64_t rearm, const uint16_t flags) { const rte_iova_t *iova_list; struct rte_mbuf *head; @@ -125,8 +125,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, return; } - mbuf->pkt_len = rx->pkt_lenm1 + 1; - mbuf->data_len = sg & 0xFFFF; + mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); + mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); mbuf->nb_segs = nb_segs; sg = sg >> 16; @@ -207,7 +209,7 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, *(uint64_t *)(&mbuf->rearm_data) = val; if (flag & NIX_RX_MULTI_SEG_F) - nix_cqe_xtract_mseg(rx, mbuf, val); + nix_cqe_xtract_mseg(rx, mbuf, val, flag); else mbuf->next = NULL; } @@ -272,8 +274,9 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, flags); cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp, (flags & NIX_RX_OFFLOAD_TSTAMP_F), - (uint64_t *)((uint8_t *)mbuf + data_off) - ); + (flags & NIX_RX_MULTI_SEG_F), + (uint64_t *)((uint8_t *)mbuf + + data_off)); rx_pkts[packets++] = mbuf; roc_prefetch_store_keep(mbuf); head++; @@ -469,6 +472,99 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, mbuf3); } + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { + const uint16x8_t len_off = { + 0, /* ptype 0:15 */ + 0, /* ptype 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/ + 0, /* pktlen 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */ + 0, + 0, + 0}; + const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC}; + const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | + PKT_RX_IEEE1588_TMST | + rxq->tstamp->rx_tstamp_dynflag; + const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; + uint64x2_t ts01, ts23, mask; + uint64_t ts[4]; + uint8_t res; + + /* Substract timesync length from total pkt length. */ + f0 = vsubq_u16(f0, len_off); + f1 = vsubq_u16(f1, len_off); + f2 = vsubq_u16(f2, len_off); + f3 = vsubq_u16(f3, len_off); + + /* Get the address of actual timestamp. */ + ts01 = vaddq_u64(mbuf01, data_off); + ts23 = vaddq_u64(mbuf23, data_off); + /* Load timestamp from address. */ + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 0), + ts01, 0); + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 1), + ts01, 1); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 0), + ts23, 0); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 1), + ts23, 1); + /* Convert from be to cpu byteorder. */ + ts01 = vrev64q_u8(ts01); + ts23 = vrev64q_u8(ts23); + /* Store timestamp into scalar for later use. */ + ts[0] = vgetq_lane_u64(ts01, 0); + ts[1] = vgetq_lane_u64(ts01, 1); + ts[2] = vgetq_lane_u64(ts23, 0); + ts[3] = vgetq_lane_u64(ts23, 1); + + /* Store timestamp into dynfield. */ + *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = + ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = + ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = + ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = + ts[3]; + + /* Generate ptype mask to filter L2 ether timesync */ + mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); + mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1); + mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2); + mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3); + + /* Match against L2 ether timesync. */ + mask = vceqq_u32(mask, ptype); + /* Convert from vector from scalar mask */ + res = vaddvq_u32(vandq_u32(mask, and_mask)); + res &= 0xF; + + if (res) { + /* Fill in the ol_flags for any packets that + * matched. + */ + ol_flags0 |= ((res & 0x1) ? ts_olf : 0); + ol_flags1 |= ((res & 0x2) ? ts_olf : 0); + ol_flags2 |= ((res & 0x4) ? ts_olf : 0); + ol_flags3 |= ((res & 0x8) ? ts_olf : 0); + + /* Update Rxq timestamp with the latest + * timestamp. + */ + rxq->tstamp->rx_ready = 1; + rxq->tstamp->rx_tstamp = + ts[31 - __builtin_clz(res)]; + } + } + /* Form rearm_data with ol_flags */ rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1); rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1); @@ -496,17 +592,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer); + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer); + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer); + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer); + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c index 65ffa97841..93528a44f9 100644 --- a/drivers/net/cnxk/cn10k_rx_vec.c +++ b/drivers/net/cnxk/cn10k_rx_vec.c @@ -11,9 +11,6 @@ struct rte_mbuf **rx_pkts, \ uint16_t pkts) \ { \ - /* TSTMP is not supported by vector */ \ - if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \ - return 0; \ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ (flags)); \ } diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c index 107a540915..cb302b75d8 100644 --- a/drivers/net/cnxk/cn9k_ethdev.c +++ b/drivers/net/cnxk/cn9k_ethdev.c @@ -309,7 +309,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev) if (nix_recalc_mtu(eth_dev)) plt_err("Failed to set MTU size for ptp"); - dev->scalar_ena = true; dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F; /* Setting up the function pointers as per new offload flags */ diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c index d293d4eac3..7d9f1bd61f 100644 --- a/drivers/net/cnxk/cn9k_rx.c +++ b/drivers/net/cnxk/cn9k_rx.c @@ -75,10 +75,7 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->scalar_ena) { if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); return pick_rx_func(eth_dev, nix_eth_rx_burst); diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h index 5ae9e8195c..dd3e5d3c7e 100644 --- a/drivers/net/cnxk/cn9k_rx.h +++ b/drivers/net/cnxk/cn9k_rx.h @@ -110,7 +110,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags, static __rte_always_inline void nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, - uint64_t rearm) + uint64_t rearm, const uint16_t flags) { const rte_iova_t *iova_list; struct rte_mbuf *head; @@ -126,8 +126,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, return; } - mbuf->pkt_len = rx->pkt_lenm1 + 1; - mbuf->data_len = sg & 0xFFFF; + mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); + mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); mbuf->nb_segs = nb_segs; sg = sg >> 16; @@ -210,7 +212,7 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, *(uint64_t *)(&mbuf->rearm_data) = val; if (flag & NIX_RX_MULTI_SEG_F) - nix_cqe_xtract_mseg(rx, mbuf, val); + nix_cqe_xtract_mseg(rx, mbuf, val, flag); else mbuf->next = NULL; } @@ -275,8 +277,9 @@ cn9k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, flags); cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp, (flags & NIX_RX_OFFLOAD_TSTAMP_F), - (uint64_t *)((uint8_t *)mbuf + data_off) - ); + (flags & NIX_RX_MULTI_SEG_F), + (uint64_t *)((uint8_t *)mbuf + + data_off)); rx_pkts[packets++] = mbuf; roc_prefetch_store_keep(mbuf); head++; @@ -472,6 +475,99 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, mbuf3); } + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { + const uint16x8_t len_off = { + 0, /* ptype 0:15 */ + 0, /* ptype 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/ + 0, /* pktlen 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */ + 0, + 0, + 0}; + const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC}; + const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | + PKT_RX_IEEE1588_TMST | + rxq->tstamp->rx_tstamp_dynflag; + const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; + uint64x2_t ts01, ts23, mask; + uint64_t ts[4]; + uint8_t res; + + /* Substract timesync length from total pkt length. */ + f0 = vsubq_u16(f0, len_off); + f1 = vsubq_u16(f1, len_off); + f2 = vsubq_u16(f2, len_off); + f3 = vsubq_u16(f3, len_off); + + /* Get the address of actual timestamp. */ + ts01 = vaddq_u64(mbuf01, data_off); + ts23 = vaddq_u64(mbuf23, data_off); + /* Load timestamp from address. */ + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 0), + ts01, 0); + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 1), + ts01, 1); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 0), + ts23, 0); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 1), + ts23, 1); + /* Convert from be to cpu byteorder. */ + ts01 = vrev64q_u8(ts01); + ts23 = vrev64q_u8(ts23); + /* Store timestamp into scalar for later use. */ + ts[0] = vgetq_lane_u64(ts01, 0); + ts[1] = vgetq_lane_u64(ts01, 1); + ts[2] = vgetq_lane_u64(ts23, 0); + ts[3] = vgetq_lane_u64(ts23, 1); + + /* Store timestamp into dynfield. */ + *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = + ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = + ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = + ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = + ts[3]; + + /* Generate ptype mask to filter L2 ether timesync */ + mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); + mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1); + mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2); + mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3); + + /* Match against L2 ether timesync. */ + mask = vceqq_u32(mask, ptype); + /* Convert from vector from scalar mask */ + res = vaddvq_u32(vandq_u32(mask, and_mask)); + res &= 0xF; + + if (res) { + /* Fill in the ol_flags for any packets that + * matched. + */ + ol_flags0 |= ((res & 0x1) ? ts_olf : 0); + ol_flags1 |= ((res & 0x2) ? ts_olf : 0); + ol_flags2 |= ((res & 0x4) ? ts_olf : 0); + ol_flags3 |= ((res & 0x8) ? ts_olf : 0); + + /* Update Rxq timestamp with the latest + * timestamp. + */ + rxq->tstamp->rx_ready = 1; + rxq->tstamp->rx_tstamp = + ts[31 - __builtin_clz(res)]; + } + } + /* Form rearm_data with ol_flags */ rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1); rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1); @@ -499,17 +595,17 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer); + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer); + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer); + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer); + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; diff --git a/drivers/net/cnxk/cn9k_rx_vec.c b/drivers/net/cnxk/cn9k_rx_vec.c index e61c2225c6..ef5f771ef7 100644 --- a/drivers/net/cnxk/cn9k_rx_vec.c +++ b/drivers/net/cnxk/cn9k_rx_vec.c @@ -9,9 +9,6 @@ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ { \ - /* TSTMP is not supported by vector */ \ - if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \ - return 0; \ return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ (flags)); \ } diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h index 67b1f42531..4eead03905 100644 --- a/drivers/net/cnxk/cnxk_ethdev.h +++ b/drivers/net/cnxk/cnxk_ethdev.h @@ -136,13 +136,12 @@ struct cnxk_eth_qconf { }; struct cnxk_timesync_info { + uint8_t rx_ready; + uint64_t rx_tstamp; uint64_t rx_tstamp_dynflag; + int tstamp_dynfield_offset; rte_iova_t tx_tstamp_iova; uint64_t *tx_tstamp; - uint64_t rx_tstamp; - int tstamp_dynfield_offset; - uint8_t tx_ready; - uint8_t rx_ready; } __plt_cache_aligned; struct cnxk_eth_dev { @@ -465,13 +464,15 @@ cnxk_nix_timestamp_dynfield(struct rte_mbuf *mbuf, static __rte_always_inline void cnxk_nix_mbuf_to_tstamp(struct rte_mbuf *mbuf, - struct cnxk_timesync_info *tstamp, bool ts_enable, + struct cnxk_timesync_info *tstamp, + const uint8_t ts_enable, const uint8_t mseg_enable, uint64_t *tstamp_ptr) { - if (ts_enable && - (mbuf->data_off == - RTE_PKTMBUF_HEADROOM + CNXK_NIX_TIMESYNC_RX_OFFSET)) { - mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET; + if (ts_enable) { + if (!mseg_enable) { + mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET; + mbuf->data_len -= CNXK_NIX_TIMESYNC_RX_OFFSET; + } /* Reading the rx timestamp inserted by CGX, viz at * starting of the packet data. -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable VLAN offload in vector Tx burst function. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 3 +- drivers/net/cnxk/cn10k_tx.h | 125 +++++++++++++++++++++++++++---- drivers/net/cnxk/cn10k_tx_vec.c | 3 +- drivers/net/cnxk/cn9k_tx.c | 3 +- drivers/net/cnxk/cn9k_tx.h | 128 ++++++++++++++++++++++++++++---- drivers/net/cnxk/cn9k_tx_vec.c | 3 +- 6 files changed, 227 insertions(+), 38 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 18694dc704..05bc163a40 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -69,8 +69,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) if (dev->scalar_ena || (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | - NIX_TX_OFFLOAD_TSO_F))) + (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 8b1446f25c..1e16978584 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -62,9 +62,14 @@ cn10k_nix_tx_ext_subs(const uint16_t flags) static __rte_always_inline uint8_t cn10k_nix_pkts_per_vec_brst(const uint16_t flags) { - RTE_SET_USED(flags); - /* We can pack up to 4 packets per LMTLINE if there are no offloads. */ - return 4 << ROC_LMT_LINES_PER_CORE_LOG2; + return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4) + << ROC_LMT_LINES_PER_CORE_LOG2; +} + +static __rte_always_inline uint8_t +cn10k_nix_tx_dwords_per_line(const uint16_t flags) +{ + return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8; } static __rte_always_inline uint64_t @@ -98,10 +103,9 @@ cn10k_nix_tx_steor_data(const uint16_t flags) static __rte_always_inline uint64_t cn10k_nix_tx_steor_vec_data(const uint16_t flags) { - const uint64_t dw_m1 = 0x7; + const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1; uint64_t data; - RTE_SET_USED(flags); /* This will be moved to addr area */ data = dw_m1; /* 15 vector sizes for single seg */ @@ -690,11 +694,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; - uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP]; + uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], + cmd2[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint16_t left, scalar, burst, i, lmt_id; + uint64x2_t sendext01_w0, sendext23_w0; + uint64x2_t sendext01_w1, sendext23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn10k_eth_txq *txq = tx_queue; @@ -720,6 +727,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0); sgdesc23_w0 = sgdesc01_w0; + /* Load command defaults into vector variables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]); + sendext23_w0 = sendext01_w0; + sendext01_w1 = vdupq_n_u64(12 | 12U << 24); + sendext23_w1 = sendext01_w1; + } + /* Get LMT base address and LMT ID as lcore id */ ROC_LMT_BASE_ID_GET(laddr, lmt_id); left = pkts; @@ -738,6 +753,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc23_w0 = senddesc01_w0; sgdesc23_w0 = sgdesc01_w0; + /* Clear vlan enables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w1 = vbicq_u64(sendext01_w1, + vdupq_n_u64(0x3FFFF00FFFF00)); + sendext23_w1 = sendext01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1303,6 +1325,52 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); + if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) { + /* Tx ol_flag for vlan. */ + const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN}; + /* Bit enable for VLAN1 */ + const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)}; + /* Tx ol_flag for QnQ. */ + const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ}; + /* Bit enable for VLAN0 */ + const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)}; + /* Load vlan values from packet. outer is VLAN 0 */ + uint64x2_t ext01 = { + ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[0]->vlan_tci) << 32, + ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[1]->vlan_tci) << 32, + }; + uint64x2_t ext23 = { + ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[2]->vlan_tci) << 32, + ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[3]->vlan_tci) << 32, + }; + + /* Get ol_flags of the packets. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* ORR vlan outer/inner values into cmd. */ + sendext01_w1 = vorrq_u64(sendext01_w1, ext01); + sendext23_w1 = vorrq_u64(sendext23_w1, ext23); + + /* Test for offload enable bits and generate masks. */ + xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv), + mlv), + vandq_u64(vtstq_u64(xtmp128, olq), + mlq)); + ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv), + mlv), + vandq_u64(vtstq_u64(ytmp128, olq), + mlq)); + + /* Set vlan enable bits into cmd based on mask. */ + sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128); + sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1381,16 +1449,41 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1); cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1); - /* Store the prepared send desc to LMT lines */ - vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]); - vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]); - lnum += 1; + if (flags & NIX_TX_NEED_EXT_HDR) { + cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1); + cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1); + cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1); + cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); + } + + if (flags & NIX_TX_NEED_EXT_HDR) { + /* Store the prepared send desc to LMT lines */ + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]); + lnum += 1; + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]); + lnum += 1; + } else { + /* Store the prepared send desc to LMT lines */ + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]); + lnum += 1; + } tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 7453f3bc98..beb5c649bb 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -14,8 +14,7 @@ uint64_t cmd[sz]; \ \ /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \ - (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ + if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ (flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index b802606075..4b43cdaff9 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -68,8 +68,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) if (dev->scalar_ena || (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | - NIX_TX_OFFLOAD_TSO_F))) + (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index 1899d6670f..d5715bb52d 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -552,10 +552,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; - uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP]; + uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], + cmd2[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; + uint64x2_t sendext01_w0, sendext23_w0; + uint64x2_t sendext01_w1, sendext23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn9k_eth_txq *txq = tx_queue; @@ -585,8 +588,19 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc23_w0 = senddesc01_w0; senddesc01_w1 = vdupq_n_u64(0); senddesc23_w1 = senddesc01_w1; - sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]); - sgdesc23_w0 = sgdesc01_w0; + + /* Load command defaults into vector variables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]); + sendext23_w0 = sendext01_w0; + sendext01_w1 = vdupq_n_u64(12 | 12U << 24); + sendext23_w1 = sendext01_w1; + sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]); + sgdesc23_w0 = sgdesc01_w0; + } else { + sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]); + sgdesc23_w0 = sgdesc01_w0; + } for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) { /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */ @@ -597,6 +611,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc23_w0 = senddesc01_w0; sgdesc23_w0 = sgdesc01_w0; + /* Clear vlan enables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w1 = vbicq_u64(sendext01_w1, + vdupq_n_u64(0x3FFFF00FFFF00)); + sendext23_w1 = sendext01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1162,6 +1183,52 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); + if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) { + /* Tx ol_flag for vlan. */ + const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN}; + /* Bit enable for VLAN1 */ + const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)}; + /* Tx ol_flag for QnQ. */ + const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ}; + /* Bit enable for VLAN0 */ + const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)}; + /* Load vlan values from packet. outer is VLAN 0 */ + uint64x2_t ext01 = { + ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[0]->vlan_tci) << 32, + ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[1]->vlan_tci) << 32, + }; + uint64x2_t ext23 = { + ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[2]->vlan_tci) << 32, + ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[3]->vlan_tci) << 32, + }; + + /* Get ol_flags of the packets. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* ORR vlan outer/inner values into cmd. */ + sendext01_w1 = vorrq_u64(sendext01_w1, ext01); + sendext23_w1 = vorrq_u64(sendext23_w1, ext23); + + /* Test for offload enable bits and generate masks. */ + xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv), + mlv), + vandq_u64(vtstq_u64(xtmp128, olq), + mlq)); + ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv), + mlv), + vandq_u64(vtstq_u64(ytmp128, olq), + mlq)); + + /* Set vlan enable bits into cmd based on mask. */ + sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128); + sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1247,17 +1314,50 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1); cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1); - do { - vst1q_u64(lmt_addr, cmd0[0]); - vst1q_u64(lmt_addr + 2, cmd1[0]); - vst1q_u64(lmt_addr + 4, cmd0[1]); - vst1q_u64(lmt_addr + 6, cmd1[1]); - vst1q_u64(lmt_addr + 8, cmd0[2]); - vst1q_u64(lmt_addr + 10, cmd1[2]); - vst1q_u64(lmt_addr + 12, cmd0[3]); - vst1q_u64(lmt_addr + 14, cmd1[3]); - lmt_status = roc_lmt_submit_ldeor(io_addr); - } while (lmt_status == 0); + if (flags & NIX_TX_NEED_EXT_HDR) { + cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1); + cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1); + cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1); + cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); + } + + if (flags & NIX_TX_NEED_EXT_HDR) { + /* With ext header in the command we can no longer send + * all 4 packets together since LMTLINE is 128bytes. + * Split and Tx twice. + */ + do { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd2[0]); + vst1q_u64(lmt_addr + 4, cmd1[0]); + vst1q_u64(lmt_addr + 6, cmd0[1]); + vst1q_u64(lmt_addr + 8, cmd2[1]); + vst1q_u64(lmt_addr + 10, cmd1[1]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + + do { + vst1q_u64(lmt_addr, cmd0[2]); + vst1q_u64(lmt_addr + 2, cmd2[2]); + vst1q_u64(lmt_addr + 4, cmd1[2]); + vst1q_u64(lmt_addr + 6, cmd0[3]); + vst1q_u64(lmt_addr + 8, cmd2[3]); + vst1q_u64(lmt_addr + 10, cmd1[3]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + } else { + do { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd1[0]); + vst1q_u64(lmt_addr + 4, cmd0[1]); + vst1q_u64(lmt_addr + 6, cmd1[1]); + vst1q_u64(lmt_addr + 8, cmd0[2]); + vst1q_u64(lmt_addr + 10, cmd1[2]); + vst1q_u64(lmt_addr + 12, cmd0[3]); + vst1q_u64(lmt_addr + 14, cmd1[3]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + } tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c index a6e7c9e542..5842facb58 100644 --- a/drivers/net/cnxk/cn9k_tx_vec.c +++ b/drivers/net/cnxk/cn9k_tx_vec.c @@ -14,8 +14,7 @@ uint64_t cmd[sz]; \ \ /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \ - (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ + if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ (flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable PTP offload in vector Tx burst function. Since, we can no-longer use a single LMT line for burst of 4, split the LMT into two and transmit twice. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 4 +- drivers/net/cnxk/cn10k_tx.h | 109 +++++++++++++++++++++++++++----- drivers/net/cnxk/cn10k_tx_vec.c | 5 +- drivers/net/cnxk/cn9k_tx.c | 4 +- drivers/net/cnxk/cn9k_tx.h | 105 ++++++++++++++++++++++++++---- drivers/net/cnxk/cn9k_tx_vec.c | 5 +- 6 files changed, 192 insertions(+), 40 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 05bc163a40..c4c3e65704 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -67,9 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || - (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) + if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 1e16978584..d5812c5c28 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -69,7 +69,9 @@ cn10k_nix_pkts_per_vec_brst(const uint16_t flags) static __rte_always_inline uint8_t cn10k_nix_tx_dwords_per_line(const uint16_t flags) { - return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8; + return (flags & NIX_TX_NEED_EXT_HDR) ? + ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) : + 8; } static __rte_always_inline uint64_t @@ -695,13 +697,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], - cmd2[NIX_DESCS_PER_LOOP]; + cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint16_t left, scalar, burst, i, lmt_id; uint64x2_t sendext01_w0, sendext23_w0; uint64x2_t sendext01_w1, sendext23_w1; + uint64x2_t sendmem01_w0, sendmem23_w0; + uint64x2_t sendmem01_w1, sendmem23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn10k_eth_txq *txq = tx_queue; @@ -733,6 +737,12 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w0 = sendext01_w0; sendext01_w1 = vdupq_n_u64(12 | 12U << 24); sendext23_w1 = sendext01_w1; + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]); + sendmem23_w0 = sendmem01_w0; + sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]); + sendmem23_w1 = sendmem01_w1; + } } /* Get LMT base address and LMT ID as lcore id */ @@ -760,6 +770,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = sendext01_w1; } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Reset send mem alg to SETTSTMP from SUB*/ + sendmem01_w0 = vbicq_u64(sendmem01_w0, + vdupq_n_u64(BIT_ULL(59))); + /* Reset send mem address to default. */ + sendmem01_w1 = + vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF)); + sendmem23_w0 = sendmem01_w0; + sendmem23_w1 = sendmem01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1371,6 +1392,44 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Tx ol_flag for timestam. */ + const uint64x2_t olf = {PKT_TX_IEEE1588_TMST, + PKT_TX_IEEE1588_TMST}; + /* Set send mem alg to SUB. */ + const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)}; + /* Increment send mem address by 8. */ + const uint64x2_t addr = {0x8, 0x8}; + + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Check if timestamp is requested and genered inverted + * mask as we need not make any changes to default cmd + * value. + */ + xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128)); + ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128)); + + /* Change send mem address to an 8 byte offset when + * TSTMP is disabled. + */ + sendmem01_w1 = vaddq_u64(sendmem01_w1, + vandq_u64(xtmp128, addr)); + sendmem23_w1 = vaddq_u64(sendmem23_w1, + vandq_u64(ytmp128, addr)); + /* Change send mem alg to SUB when TSTMP is disabled. */ + sendmem01_w0 = vorrq_u64(sendmem01_w0, + vandq_u64(xtmp128, alg)); + sendmem23_w0 = vorrq_u64(sendmem23_w0, + vandq_u64(ytmp128, alg)); + + cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1); + cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1); + cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1); + cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1458,19 +1517,39 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (flags & NIX_TX_NEED_EXT_HDR) { /* Store the prepared send desc to LMT lines */ - vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]); - lnum += 1; - vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]); - vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]); - vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]); + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]); + lnum += 1; + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]); + } else { + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]); + lnum += 1; + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]); + } lnum += 1; } else { /* Store the prepared send desc to LMT lines */ diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index beb5c649bb..0b4a4c7bae 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -13,9 +13,8 @@ { \ uint64_t cmd[sz]; \ \ - /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ - (flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* TSO is not supported by vec */ \ + if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ (flags)); \ diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index 4b43cdaff9..c32681ed44 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -66,9 +66,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || - (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) + if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index d5715bb52d..bfb34abb23 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -553,12 +553,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], - cmd2[NIX_DESCS_PER_LOOP]; + cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint64x2_t sendext01_w0, sendext23_w0; uint64x2_t sendext01_w1, sendext23_w1; + uint64x2_t sendmem01_w0, sendmem23_w0; + uint64x2_t sendmem01_w1, sendmem23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn9k_eth_txq *txq = tx_queue; @@ -597,6 +599,12 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = sendext01_w1; sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]); sgdesc23_w0 = sgdesc01_w0; + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + sendmem01_w0 = vld1q_dup_u64(&txq->cmd[6]); + sendmem23_w0 = sendmem01_w0; + sendmem01_w1 = vld1q_dup_u64(&txq->cmd[7]); + sendmem23_w1 = sendmem01_w1; + } } else { sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]); sgdesc23_w0 = sgdesc01_w0; @@ -618,6 +626,17 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = sendext01_w1; } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Reset send mem alg to SETTSTMP from SUB*/ + sendmem01_w0 = vbicq_u64(sendmem01_w0, + vdupq_n_u64(BIT_ULL(59))); + /* Reset send mem address to default. */ + sendmem01_w1 = + vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF)); + sendmem23_w0 = sendmem01_w0; + sendmem23_w1 = sendmem01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1229,6 +1248,44 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Tx ol_flag for timestam. */ + const uint64x2_t olf = {PKT_TX_IEEE1588_TMST, + PKT_TX_IEEE1588_TMST}; + /* Set send mem alg to SUB. */ + const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)}; + /* Increment send mem address by 8. */ + const uint64x2_t addr = {0x8, 0x8}; + + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Check if timestamp is requested and genered inverted + * mask as we need not make any changes to default cmd + * value. + */ + xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128)); + ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128)); + + /* Change send mem address to an 8 byte offset when + * TSTMP is disabled. + */ + sendmem01_w1 = vaddq_u64(sendmem01_w1, + vandq_u64(xtmp128, addr)); + sendmem23_w1 = vaddq_u64(sendmem23_w1, + vandq_u64(ytmp128, addr)); + /* Change send mem alg to SUB when TSTMP is disabled. */ + sendmem01_w0 = vorrq_u64(sendmem01_w0, + vandq_u64(xtmp128, alg)); + sendmem23_w0 = vorrq_u64(sendmem23_w0, + vandq_u64(ytmp128, alg)); + + cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1); + cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1); + cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1); + cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1327,22 +1384,44 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, * Split and Tx twice. */ do { - vst1q_u64(lmt_addr, cmd0[0]); - vst1q_u64(lmt_addr + 2, cmd2[0]); - vst1q_u64(lmt_addr + 4, cmd1[0]); - vst1q_u64(lmt_addr + 6, cmd0[1]); - vst1q_u64(lmt_addr + 8, cmd2[1]); - vst1q_u64(lmt_addr + 10, cmd1[1]); + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd2[0]); + vst1q_u64(lmt_addr + 4, cmd1[0]); + vst1q_u64(lmt_addr + 6, cmd3[0]); + vst1q_u64(lmt_addr + 8, cmd0[1]); + vst1q_u64(lmt_addr + 10, cmd2[1]); + vst1q_u64(lmt_addr + 12, cmd1[1]); + vst1q_u64(lmt_addr + 14, cmd3[1]); + } else { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd2[0]); + vst1q_u64(lmt_addr + 4, cmd1[0]); + vst1q_u64(lmt_addr + 6, cmd0[1]); + vst1q_u64(lmt_addr + 8, cmd2[1]); + vst1q_u64(lmt_addr + 10, cmd1[1]); + } lmt_status = roc_lmt_submit_ldeor(io_addr); } while (lmt_status == 0); do { - vst1q_u64(lmt_addr, cmd0[2]); - vst1q_u64(lmt_addr + 2, cmd2[2]); - vst1q_u64(lmt_addr + 4, cmd1[2]); - vst1q_u64(lmt_addr + 6, cmd0[3]); - vst1q_u64(lmt_addr + 8, cmd2[3]); - vst1q_u64(lmt_addr + 10, cmd1[3]); + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + vst1q_u64(lmt_addr, cmd0[2]); + vst1q_u64(lmt_addr + 2, cmd2[2]); + vst1q_u64(lmt_addr + 4, cmd1[2]); + vst1q_u64(lmt_addr + 6, cmd3[2]); + vst1q_u64(lmt_addr + 8, cmd0[3]); + vst1q_u64(lmt_addr + 10, cmd2[3]); + vst1q_u64(lmt_addr + 12, cmd1[3]); + vst1q_u64(lmt_addr + 14, cmd3[3]); + } else { + vst1q_u64(lmt_addr, cmd0[2]); + vst1q_u64(lmt_addr + 2, cmd2[2]); + vst1q_u64(lmt_addr + 4, cmd1[2]); + vst1q_u64(lmt_addr + 6, cmd0[3]); + vst1q_u64(lmt_addr + 8, cmd2[3]); + vst1q_u64(lmt_addr + 10, cmd1[3]); + } lmt_status = roc_lmt_submit_ldeor(io_addr); } while (lmt_status == 0); } else { diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c index 5842facb58..9ade66db2b 100644 --- a/drivers/net/cnxk/cn9k_tx_vec.c +++ b/drivers/net/cnxk/cn9k_tx_vec.c @@ -13,9 +13,8 @@ { \ uint64_t cmd[sz]; \ \ - /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ - (flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* TSO is not supported by vec */ \ + if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ (flags)); \ -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable TSO offload in vector Tx burst function. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 2 +- drivers/net/cnxk/cn10k_tx.h | 97 +++++++++++++++++++++++++++++++++ drivers/net/cnxk/cn10k_tx_vec.c | 5 +- drivers/net/cnxk/cn9k_tx.c | 2 +- drivers/net/cnxk/cn9k_tx.h | 94 ++++++++++++++++++++++++++++++++ drivers/net/cnxk/cn9k_tx_vec.c | 5 +- 6 files changed, 199 insertions(+), 6 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index c4c3e65704..d06879163f 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -67,7 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) + if (dev->scalar_ena) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index d5812c5c28..cea7c6cd34 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -689,6 +689,46 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, #if defined(RTE_ARCH_ARM64) +static __rte_always_inline void +cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, + union nix_send_ext_w0_u *w0, uint64_t ol_flags, + const uint64_t flags, const uint64_t lso_tun_fmt) +{ + uint16_t lso_sb; + uint64_t mask; + + if (!(ol_flags & PKT_TX_TCP_SEG)) + return; + + mask = -(!w1->il3type); + lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len; + + w0->u |= BIT(14); + w0->lso_sb = lso_sb; + w0->lso_mps = m->tso_segsz; + w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6); + w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM; + + /* Handle tunnel tso */ + if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) && + (ol_flags & PKT_TX_TUNNEL_MASK)) { + const uint8_t is_udp_tun = + (CNXK_NIX_UDP_TUN_BITMASK >> + ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & + 0x1; + uint8_t shift = is_udp_tun ? 32 : 0; + + shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4); + shift += (!!(ol_flags & PKT_TX_IPV6) << 3); + + w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM; + w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0; + /* Update format for UDP tunneled packet */ + + w0->lso_format = (lso_tun_fmt >> shift); + } +} + #define NIX_DESCS_PER_LOOP 4 static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, @@ -723,6 +763,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, /* Reduce the cached count */ txq->fc_cache_pkts -= pkts; + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) { + for (i = 0; i < pkts; i++) + cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags); + } senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0); senddesc23_w0 = senddesc01_w0; @@ -781,6 +826,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendmem23_w1 = sendmem01_w1; } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + /* Clear the LSO enable bit. */ + sendext01_w0 = vbicq_u64(sendext01_w0, + vdupq_n_u64(BIT_ULL(14))); + sendext23_w0 = sendext01_w0; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1430,6 +1482,51 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + const uint64_t lso_fmt = txq->lso_tun_fmt; + uint64_t sx_w0[NIX_DESCS_PER_LOOP]; + uint64_t sd_w1[NIX_DESCS_PER_LOOP]; + + /* Extract SD W1 as we need to set L4 types. */ + vst1q_u64(sd_w1, senddesc01_w1); + vst1q_u64(sd_w1 + 2, senddesc23_w1); + + /* Extract SX W0 as we need to set LSO fields. */ + vst1q_u64(sx_w0, sendext01_w0); + vst1q_u64(sx_w0 + 2, sendext23_w0); + + /* Extract ol_flags. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Prepare individual mbufs. */ + cn10k_nix_prepare_tso(tx_pkts[0], + (union nix_send_hdr_w1_u *)&sd_w1[0], + (union nix_send_ext_w0_u *)&sx_w0[0], + vgetq_lane_u64(xtmp128, 0), flags, lso_fmt); + + cn10k_nix_prepare_tso(tx_pkts[1], + (union nix_send_hdr_w1_u *)&sd_w1[1], + (union nix_send_ext_w0_u *)&sx_w0[1], + vgetq_lane_u64(xtmp128, 1), flags, lso_fmt); + + cn10k_nix_prepare_tso(tx_pkts[2], + (union nix_send_hdr_w1_u *)&sd_w1[2], + (union nix_send_ext_w0_u *)&sx_w0[2], + vgetq_lane_u64(ytmp128, 0), flags, lso_fmt); + + cn10k_nix_prepare_tso(tx_pkts[3], + (union nix_send_hdr_w1_u *)&sd_w1[3], + (union nix_send_ext_w0_u *)&sx_w0[3], + vgetq_lane_u64(ytmp128, 1), flags, lso_fmt); + + senddesc01_w1 = vld1q_u64(sd_w1); + senddesc23_w1 = vld1q_u64(sd_w1 + 2); + + sendext01_w0 = vld1q_u64(sx_w0); + sendext23_w0 = vld1q_u64(sx_w0 + 2); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 0b4a4c7bae..34e3737501 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -13,8 +13,9 @@ { \ uint64_t cmd[sz]; \ \ - /* TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ (flags)); \ diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index c32681ed44..735e21cc60 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -66,7 +66,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) + if (dev->scalar_ena) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index bfb34abb23..2adff45705 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -545,6 +545,43 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, #if defined(RTE_ARCH_ARM64) +static __rte_always_inline void +cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, + union nix_send_ext_w0_u *w0, uint64_t ol_flags, + uint64_t flags) +{ + uint16_t lso_sb; + uint64_t mask; + + if (!(ol_flags & PKT_TX_TCP_SEG)) + return; + + mask = -(!w1->il3type); + lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len; + + w0->u |= BIT(14); + w0->lso_sb = lso_sb; + w0->lso_mps = m->tso_segsz; + w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6); + w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM; + + /* Handle tunnel tso */ + if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) && + (ol_flags & PKT_TX_TUNNEL_MASK)) { + const uint8_t is_udp_tun = + (CNXK_NIX_UDP_TUN_BITMASK >> + ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & + 0x1; + + w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM; + w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0; + /* Update format for UDP tunneled packet */ + w0->lso_format += is_udp_tun ? 2 : 6; + + w0->lso_format += !!(ol_flags & PKT_TX_OUTER_IPV6) << 1; + } +} + #define NIX_DESCS_PER_LOOP 4 static __rte_always_inline uint16_t cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, @@ -580,6 +617,12 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, /* Reduce the cached count */ txq->fc_cache_pkts -= pkts; + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) { + for (i = 0; i < pkts; i++) + cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags); + } + /* Lets commit any changes in the packet here as no further changes * to the packet will be done unless no fast free is enabled. */ @@ -637,6 +680,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendmem23_w1 = sendmem01_w1; } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + /* Clear the LSO enable bit. */ + sendext01_w0 = vbicq_u64(sendext01_w0, + vdupq_n_u64(BIT_ULL(14))); + sendext23_w0 = sendext01_w0; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1286,6 +1336,50 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + uint64_t sx_w0[NIX_DESCS_PER_LOOP]; + uint64_t sd_w1[NIX_DESCS_PER_LOOP]; + + /* Extract SD W1 as we need to set L4 types. */ + vst1q_u64(sd_w1, senddesc01_w1); + vst1q_u64(sd_w1 + 2, senddesc23_w1); + + /* Extract SX W0 as we need to set LSO fields. */ + vst1q_u64(sx_w0, sendext01_w0); + vst1q_u64(sx_w0 + 2, sendext23_w0); + + /* Extract ol_flags. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Prepare individual mbufs. */ + cn9k_nix_prepare_tso(tx_pkts[0], + (union nix_send_hdr_w1_u *)&sd_w1[0], + (union nix_send_ext_w0_u *)&sx_w0[0], + vgetq_lane_u64(xtmp128, 0), flags); + + cn9k_nix_prepare_tso(tx_pkts[1], + (union nix_send_hdr_w1_u *)&sd_w1[1], + (union nix_send_ext_w0_u *)&sx_w0[1], + vgetq_lane_u64(xtmp128, 1), flags); + + cn9k_nix_prepare_tso(tx_pkts[2], + (union nix_send_hdr_w1_u *)&sd_w1[2], + (union nix_send_ext_w0_u *)&sx_w0[2], + vgetq_lane_u64(ytmp128, 0), flags); + + cn9k_nix_prepare_tso(tx_pkts[3], + (union nix_send_hdr_w1_u *)&sd_w1[3], + (union nix_send_ext_w0_u *)&sx_w0[3], + vgetq_lane_u64(ytmp128, 1), flags); + + senddesc01_w1 = vld1q_u64(sd_w1); + senddesc23_w1 = vld1q_u64(sd_w1 + 2); + + sendext01_w0 = vld1q_u64(sx_w0); + sendext23_w0 = vld1q_u64(sx_w0 + 2); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c index 9ade66db2b..56a3e2514a 100644 --- a/drivers/net/cnxk/cn9k_tx_vec.c +++ b/drivers/net/cnxk/cn9k_tx_vec.c @@ -13,8 +13,9 @@ { \ uint64_t cmd[sz]; \ \ - /* TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ (flags)); \ -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add multi segment Tx vector routine. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 20 +- drivers/net/cnxk/cn10k_tx.h | 388 +++++++++++++++++++++++++-- drivers/net/cnxk/cn10k_tx_vec_mseg.c | 24 ++ drivers/net/cnxk/cn9k_tx.c | 20 +- drivers/net/cnxk/cn9k_tx.h | 272 ++++++++++++++++++- drivers/net/cnxk/cn9k_tx_vec_mseg.c | 24 ++ drivers/net/cnxk/meson.build | 6 +- 7 files changed, 709 insertions(+), 45 deletions(-) create mode 100644 drivers/net/cnxk/cn10k_tx_vec_mseg.c create mode 100644 drivers/net/cnxk/cn9k_tx_vec_mseg.c diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index d06879163f..1f30bab59a 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -67,13 +67,23 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena) + const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_vec_mseg_##name, + + NIX_TX_FASTPATH_MODES +#undef T + }; + + if (dev->scalar_ena) { pick_tx_func(eth_dev, nix_eth_tx_burst); - else + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + } else { pick_tx_func(eth_dev, nix_eth_tx_vec_burst); - - if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) - pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg); + } rte_mb(); } diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index cea7c6cd34..b25b20dcb2 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -42,6 +42,13 @@ } \ } while (0) +/* Encoded number of segments to number of dwords macro, each value of nb_segs + * is encoded as 4bits. + */ +#define NIX_SEGDW_MAGIC 0x76654432210ULL + +#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF) + #define LMT_OFF(lmt_addr, lmt_num, offset) \ (void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset)) @@ -102,6 +109,14 @@ cn10k_nix_tx_steor_data(const uint16_t flags) return data; } +static __rte_always_inline uint8_t +cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags) +{ + return ((flags & NIX_TX_NEED_EXT_HDR) ? + (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 : + 4); +} + static __rte_always_inline uint64_t cn10k_nix_tx_steor_vec_data(const uint16_t flags) { @@ -729,7 +744,244 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, } } +static __rte_always_inline void +cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd, + union nix_send_hdr_w0_u *sh, + union nix_send_sg_s *sg, const uint32_t flags) +{ + struct rte_mbuf *m_next; + uint64_t *slist, sg_u; + uint16_t nb_segs; + int i = 1; + + sh->total = m->pkt_len; + /* Clear sg->u header before use */ + sg->u &= 0xFC00000000000000; + sg_u = sg->u; + slist = &cmd[0]; + + sg_u = sg_u | ((uint64_t)m->data_len); + + nb_segs = m->nb_segs - 1; + m_next = m->next; + + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << 55); + /* Mark mempool object as "put" since it is freed by NIX */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + + m = m_next; + /* Fill mbuf segments */ + do { + m_next = m->next; + sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); + *slist = rte_mbuf_data_iova(m); + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55)); + /* Mark mempool object as "put" since it is freed by NIX + */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << (i + 55)))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + slist++; + i++; + nb_segs--; + if (i > 2 && nb_segs) { + i = 0; + /* Next SG subdesc */ + *(uint64_t *)slist = sg_u & 0xFC00000000000000; + sg->u = sg_u; + sg->segs = 3; + sg = (union nix_send_sg_s *)slist; + sg_u = sg->u; + slist++; + } + m = m_next; + } while (nb_segs); + + sg->u = sg_u; + sg->segs = i; +} + +static __rte_always_inline void +cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0, + uint64x2_t *cmd1, const uint8_t segdw, + const uint32_t flags) +{ + union nix_send_hdr_w0_u sh; + union nix_send_sg_s sg; + + if (m->nb_segs == 1) { + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + sg.u = vgetq_lane_u64(cmd1[0], 0); + sg.u |= (cnxk_nix_prefree_seg(m) << 55); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); + } + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + sg.u = vgetq_lane_u64(cmd1[0], 0); + if (!(sg.u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + return; + } + + sh.u = vgetq_lane_u64(cmd0[0], 0); + sg.u = vgetq_lane_u64(cmd1[0], 0); + + cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags); + + sh.sizem1 = segdw - 1; + cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); +} + #define NIX_DESCS_PER_LOOP 4 + +static __rte_always_inline uint8_t +cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0, + uint64x2_t *cmd1, uint64x2_t *cmd2, + uint64x2_t *cmd3, uint8_t *segdw, + uint64_t *lmt_addr, __uint128_t *data128, + uint8_t *shift, const uint16_t flags) +{ + uint8_t j, off, lmt_used; + + if (!(flags & NIX_TX_NEED_EXT_HDR) && + !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + /* No segments in 4 consecutive packets. */ + if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) { + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) + cn10k_nix_prepare_mseg_vec(mbufs[j], NULL, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd1[0]); + vst1q_u64(lmt_addr + 4, cmd0[1]); + vst1q_u64(lmt_addr + 6, cmd1[1]); + vst1q_u64(lmt_addr + 8, cmd0[2]); + vst1q_u64(lmt_addr + 10, cmd1[2]); + vst1q_u64(lmt_addr + 12, cmd0[3]); + vst1q_u64(lmt_addr + 14, cmd1[3]); + + *data128 |= ((__uint128_t)7) << *shift; + shift += 3; + + return 1; + } + } + + lmt_used = 0; + for (j = 0; j < NIX_DESCS_PER_LOOP;) { + /* Fit consecutive packets in same LMTLINE. */ + if ((segdw[j] + segdw[j + 1]) <= 8) { + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + cn10k_nix_prepare_mseg_vec(mbufs[j], NULL, + &cmd0[j], &cmd1[j], + segdw[j], flags); + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL, + &cmd0[j + 1], + &cmd1[j + 1], + segdw[j + 1], flags); + /* TSTAMP takes 4 each, no segs. */ + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + vst1q_u64(lmt_addr + 6, cmd3[j]); + + vst1q_u64(lmt_addr + 8, cmd0[j + 1]); + vst1q_u64(lmt_addr + 10, cmd2[j + 1]); + vst1q_u64(lmt_addr + 12, cmd1[j + 1]); + vst1q_u64(lmt_addr + 14, cmd3[j + 1]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + /* EXT header take 3 each, space for 2 segs.*/ + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 6, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + off = segdw[j] - 3; + off <<= 1; + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], + lmt_addr + 12 + off, + &cmd0[j + 1], + &cmd1[j + 1], + segdw[j + 1], flags); + vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]); + vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]); + } else { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 4, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + off = segdw[j] - 2; + off <<= 1; + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], + lmt_addr + 8 + off, + &cmd0[j + 1], + &cmd1[j + 1], + segdw[j + 1], flags); + vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]); + } + *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1) + << *shift; + *shift += 3; + j += 2; + } else { + if ((flags & NIX_TX_NEED_EXT_HDR) && + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 6, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + off = segdw[j] - 4; + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd3[j]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 6, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + } else { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 4, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + } + *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift; + *shift += 3; + j++; + } + lmt_used++; + lmt_addr += 16; + } + + return lmt_used; +} + static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t *cmd, const uint16_t flags) @@ -738,7 +990,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP]; - uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa; + uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint16_t left, scalar, burst, i, lmt_id; @@ -746,6 +998,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t sendext01_w1, sendext23_w1; uint64x2_t sendmem01_w0, sendmem23_w0; uint64x2_t sendmem01_w1, sendmem23_w1; + uint8_t segdw[NIX_DESCS_PER_LOOP + 1]; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn10k_eth_txq *txq = tx_queue; @@ -754,7 +1007,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t ltypes01, ltypes23; uint64x2_t xtmp128, ytmp128; uint64x2_t xmask01, xmask23; - uint8_t lnum; + uint8_t lnum, shift; + union wdata { + __uint128_t data128; + uint64_t data[2]; + } wd; NIX_XMIT_FC_OR_RETURN(txq, pkts); @@ -798,8 +1055,43 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, burst = left > cn10k_nix_pkts_per_vec_brst(flags) ? cn10k_nix_pkts_per_vec_brst(flags) : left; + if (flags & NIX_TX_MULTI_SEG_F) { + wd.data128 = 0; + shift = 16; + } lnum = 0; + for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) { + if (flags & NIX_TX_MULTI_SEG_F) { + struct rte_mbuf *m = tx_pkts[j]; + uint8_t j; + + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) { + /* Get dwords based on nb_segs. */ + segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs); + /* Add dwords based on offloads. */ + segdw[j] += 1 + /* SEND HDR */ + !!(flags & NIX_TX_NEED_EXT_HDR) + + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); + } + + /* Check if there are enough LMTLINES for this loop */ + if (lnum + 4 > 32) { + uint8_t ldwords_con = 0, lneeded = 0; + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) { + ldwords_con += segdw[j]; + if (ldwords_con > 8) { + lneeded += 1; + ldwords_con = segdw[j]; + } + } + lneeded += 1; + if (lnum + lneeded > 32) { + burst = i; + break; + } + } + } /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */ senddesc01_w0 = vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF)); @@ -1527,7 +1819,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w0 = vld1q_u64(sx_w0 + 2); } - if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) && + !(flags & NIX_TX_MULTI_SEG_F)) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); xmask23 = xmask01; @@ -1567,7 +1860,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, (void **)&mbuf3, 1, 0); senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); - } else { + } else if (!(flags & NIX_TX_MULTI_SEG_F)) { /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1612,7 +1905,19 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); } - if (flags & NIX_TX_NEED_EXT_HDR) { + if (flags & NIX_TX_MULTI_SEG_F) { + uint8_t j; + + segdw[4] = 8; + j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1, + cmd2, cmd3, segdw, + (uint64_t *) + LMT_OFF(laddr, lnum, + 0), + &wd.data128, &shift, + flags); + lnum += j; + } else if (flags & NIX_TX_NEED_EXT_HDR) { /* Store the prepared send desc to LMT lines */ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); @@ -1664,34 +1969,55 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[0] >>= 16; + /* Trigger LMTST */ if (lnum > 16) { - data = cn10k_nix_tx_steor_vec_data(flags); - pa = io_addr | (data & 0x7) << 4; - data &= ~0x7ULL; - data |= (15ULL << 12); - data |= (uint64_t)lmt_id; + if (!(flags & NIX_TX_MULTI_SEG_F)) + wd.data[0] = cn10k_nix_tx_steor_vec_data(flags); + + pa = io_addr | (wd.data[0] & 0x7) << 4; + wd.data[0] &= ~0x7ULL; + + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[0] <<= 16; + + wd.data[0] |= (15ULL << 12); + wd.data[0] |= (uint64_t)lmt_id; /* STEOR0 */ - roc_lmt_submit_steorl(data, pa); + roc_lmt_submit_steorl(wd.data[0], pa); - data = cn10k_nix_tx_steor_vec_data(flags); - pa = io_addr | (data & 0x7) << 4; - data &= ~0x7ULL; - data |= ((uint64_t)(lnum - 17)) << 12; - data |= (uint64_t)(lmt_id + 16); + if (!(flags & NIX_TX_MULTI_SEG_F)) + wd.data[1] = cn10k_nix_tx_steor_vec_data(flags); + + pa = io_addr | (wd.data[1] & 0x7) << 4; + wd.data[1] &= ~0x7ULL; + + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[1] <<= 16; + + wd.data[1] |= ((uint64_t)(lnum - 17)) << 12; + wd.data[1] |= (uint64_t)(lmt_id + 16); /* STEOR1 */ - roc_lmt_submit_steorl(data, pa); + roc_lmt_submit_steorl(wd.data[1], pa); } else if (lnum) { - data = cn10k_nix_tx_steor_vec_data(flags); - pa = io_addr | (data & 0x7) << 4; - data &= ~0x7ULL; - data |= ((uint64_t)(lnum - 1)) << 12; - data |= lmt_id; + if (!(flags & NIX_TX_MULTI_SEG_F)) + wd.data[0] = cn10k_nix_tx_steor_vec_data(flags); + + pa = io_addr | (wd.data[0] & 0x7) << 4; + wd.data[0] &= ~0x7ULL; + + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[0] <<= 16; + + wd.data[0] |= ((uint64_t)(lnum - 1)) << 12; + wd.data[0] |= lmt_id; /* STEOR0 */ - roc_lmt_submit_steorl(data, pa); + roc_lmt_submit_steorl(wd.data[0], pa); } left -= burst; @@ -1699,9 +2025,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (left) goto again; - if (unlikely(scalar)) - pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd, - flags); + if (unlikely(scalar)) { + if (flags & NIX_TX_MULTI_SEG_F) + pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, + scalar, cmd, flags); + else + pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, + cmd, flags); + } return pkts; } @@ -1866,7 +2197,10 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \ - void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ NIX_TX_FASTPATH_MODES #undef T diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c new file mode 100644 index 0000000000..1fad81dbad --- /dev/null +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_ethdev.h" +#include "cn10k_tx.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \ + { \ + uint64_t cmd[sz]; \ + \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ + return 0; \ + return cn10k_nix_xmit_pkts_vector( \ + tx_queue, tx_pkts, pkts, cmd, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index 735e21cc60..763f9a14fd 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -66,13 +66,23 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena) + const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_nix_xmit_pkts_vec_mseg_##name, + + NIX_TX_FASTPATH_MODES +#undef T + }; + + if (dev->scalar_ena) { pick_tx_func(eth_dev, nix_eth_tx_burst); - else + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + } else { pick_tx_func(eth_dev, nix_eth_tx_vec_burst); - - if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) - pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg); + } rte_mb(); } diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index 2adff45705..42b54a378e 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -582,7 +582,238 @@ cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, } } +static __rte_always_inline uint8_t +cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd, + union nix_send_hdr_w0_u *sh, + union nix_send_sg_s *sg, const uint32_t flags) +{ + struct rte_mbuf *m_next; + uint64_t *slist, sg_u; + uint16_t nb_segs; + uint64_t segdw; + int i = 1; + + sh->total = m->pkt_len; + /* Clear sg->u header before use */ + sg->u &= 0xFC00000000000000; + sg_u = sg->u; + slist = &cmd[0]; + + sg_u = sg_u | ((uint64_t)m->data_len); + + nb_segs = m->nb_segs - 1; + m_next = m->next; + + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << 55); + /* Mark mempool object as "put" since it is freed by NIX */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + + m = m_next; + /* Fill mbuf segments */ + do { + m_next = m->next; + sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); + *slist = rte_mbuf_data_iova(m); + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55)); + /* Mark mempool object as "put" since it is freed by NIX + */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << (i + 55)))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + slist++; + i++; + nb_segs--; + if (i > 2 && nb_segs) { + i = 0; + /* Next SG subdesc */ + *(uint64_t *)slist = sg_u & 0xFC00000000000000; + sg->u = sg_u; + sg->segs = 3; + sg = (union nix_send_sg_s *)slist; + sg_u = sg->u; + slist++; + } + m = m_next; + } while (nb_segs); + + sg->u = sg_u; + sg->segs = i; + segdw = (uint64_t *)slist - (uint64_t *)&cmd[0]; + + segdw += 2; + /* Roundup extra dwords to multiple of 2 */ + segdw = (segdw >> 1) + (segdw & 0x1); + /* Default dwords */ + segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) + + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); + sh->sizem1 = segdw - 1; + + return segdw; +} + +static __rte_always_inline uint8_t +cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0, + uint64x2_t *cmd1, const uint32_t flags) +{ + union nix_send_hdr_w0_u sh; + union nix_send_sg_s sg; + uint8_t ret; + + if (m->nb_segs == 1) { + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + sg.u = vgetq_lane_u64(cmd1[0], 0); + sg.u |= (cnxk_nix_prefree_seg(m) << 55); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); + } + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + sg.u = vgetq_lane_u64(cmd1[0], 0); + if (!(sg.u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) + + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); + } + + sh.u = vgetq_lane_u64(cmd0[0], 0); + sg.u = vgetq_lane_u64(cmd1[0], 0); + + ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags); + + cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); + return ret; +} + #define NIX_DESCS_PER_LOOP 4 + +static __rte_always_inline void +cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1, + uint64x2_t *cmd2, uint64x2_t *cmd3, + uint8_t *segdw, + uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2], + uint64_t *lmt_addr, rte_iova_t io_addr, + const uint32_t flags) +{ + uint64_t lmt_status; + uint8_t j, off; + + if (!(flags & NIX_TX_NEED_EXT_HDR) && + !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + /* No segments in 4 consecutive packets. */ + if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) { + do { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd1[0]); + vst1q_u64(lmt_addr + 4, cmd0[1]); + vst1q_u64(lmt_addr + 6, cmd1[1]); + vst1q_u64(lmt_addr + 8, cmd0[2]); + vst1q_u64(lmt_addr + 10, cmd1[2]); + vst1q_u64(lmt_addr + 12, cmd0[3]); + vst1q_u64(lmt_addr + 14, cmd1[3]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + + return; + } + } + + for (j = 0; j < NIX_DESCS_PER_LOOP;) { + /* Fit consecutive packets in same LMTLINE. */ + if ((segdw[j] + segdw[j + 1]) <= 8) { +again0: + if ((flags & NIX_TX_NEED_EXT_HDR) && + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 4; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd3[j]); + + vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]); + vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]); + roc_lmt_mov_seg(lmt_addr + 14 + off, + slist[j + 1], segdw[j + 1] - 4); + off += ((segdw[j + 1] - 4) << 1); + vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 3; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]); + vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]); + roc_lmt_mov_seg(lmt_addr + 12 + off, + slist[j + 1], segdw[j + 1] - 3); + } else { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 2; + roc_lmt_mov_seg(lmt_addr + 4, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]); + roc_lmt_mov_seg(lmt_addr + 8 + off, + slist[j + 1], segdw[j + 1] - 2); + } + lmt_status = roc_lmt_submit_ldeor(io_addr); + if (lmt_status == 0) + goto again0; + j += 2; + } else { +again1: + if ((flags & NIX_TX_NEED_EXT_HDR) && + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 4; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd3[j]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 3; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + } else { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 2; + roc_lmt_mov_seg(lmt_addr + 4, slist[j], off); + } + lmt_status = roc_lmt_submit_ldeor(io_addr); + if (lmt_status == 0) + goto again1; + j += 1; + } + } +} + static __rte_always_inline uint16_t cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t *cmd, const uint16_t flags) @@ -1380,7 +1611,8 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w0 = vld1q_u64(sx_w0 + 2); } - if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) && + !(flags & NIX_TX_MULTI_SEG_F)) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); xmask23 = xmask01; @@ -1424,7 +1656,7 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, * cnxk_nix_prefree_seg are written before LMTST. */ rte_io_wmb(); - } else { + } else if (!(flags & NIX_TX_MULTI_SEG_F)) { /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1472,7 +1704,27 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); } - if (flags & NIX_TX_NEED_EXT_HDR) { + if (flags & NIX_TX_MULTI_SEG_F) { + uint64_t seg_list[NIX_DESCS_PER_LOOP] + [CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; + uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1]; + + /* Build mseg list for each packet individually. */ + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) + segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j], + seg_list[j], &cmd0[j], + &cmd1[j], flags); + segdw[4] = 8; + + /* Commit all changes to mbuf before LMTST. */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + rte_io_wmb(); + + cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3, + segdw, seg_list, + lmt_addr, io_addr, + flags); + } else if (flags & NIX_TX_NEED_EXT_HDR) { /* With ext header in the command we can no longer send * all 4 packets together since LMTLINE is 128bytes. * Split and Tx twice. @@ -1534,9 +1786,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } - if (unlikely(pkts_left)) - pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd, - flags); + if (unlikely(pkts_left)) { + if (flags & NIX_TX_MULTI_SEG_F) + pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, + pkts_left, cmd, flags); + else + pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, + cmd, flags); + } return pkts; } @@ -1701,6 +1958,9 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn9k_tx_vec_mseg.c b/drivers/net/cnxk/cn9k_tx_vec_mseg.c new file mode 100644 index 0000000000..0256efd45a --- /dev/null +++ b/drivers/net/cnxk/cn9k_tx_vec_mseg.c @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_ethdev.h" +#include "cn9k_tx.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \ + { \ + uint64_t cmd[sz]; \ + \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ + return 0; \ + return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ + (flags) | \ + NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build index aa8c7253fb..361f7ce849 100644 --- a/drivers/net/cnxk/meson.build +++ b/drivers/net/cnxk/meson.build @@ -26,7 +26,8 @@ sources += files('cn9k_ethdev.c', 'cn9k_rx_vec_mseg.c', 'cn9k_tx.c', 'cn9k_tx_mseg.c', - 'cn9k_tx_vec.c') + 'cn9k_tx_vec.c', + 'cn9k_tx_vec_mseg.c') # CN10K sources += files('cn10k_ethdev.c', 'cn10k_rte_flow.c', @@ -36,7 +37,8 @@ sources += files('cn10k_ethdev.c', 'cn10k_rx_vec_mseg.c', 'cn10k_tx.c', 'cn10k_tx_mseg.c', - 'cn10k_tx_vec.c') + 'cn10k_tx_vec.c', + 'cn10k_tx_vec_mseg.c') deps += ['bus_pci', 'cryptodev', 'eventdev', 'security'] deps += ['common_cnxk', 'mempool_cnxk'] -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter. Resize cn10k workslot fastpath structure to fit in 64B cacheline size. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 28 ++++ doc/guides/rel_notes/release_21_08.rst | 5 + drivers/common/cnxk/roc_nix.h | 3 + drivers/common/cnxk/roc_nix_fc.c | 78 ++++++++++ drivers/common/cnxk/roc_nix_priv.h | 3 +- drivers/common/cnxk/version.map | 1 + drivers/event/cnxk/cn10k_eventdev.c | 107 +++++++++++--- drivers/event/cnxk/cn10k_worker.c | 7 +- drivers/event/cnxk/cn10k_worker.h | 32 +++-- drivers/event/cnxk/cn9k_eventdev.c | 89 ++++++++++++ drivers/event/cnxk/cn9k_worker.h | 4 + drivers/event/cnxk/cnxk_eventdev.c | 2 + drivers/event/cnxk/cnxk_eventdev.h | 43 ++++-- drivers/event/cnxk/cnxk_eventdev_adptr.c | 176 +++++++++++++++++++++++ drivers/event/cnxk/meson.build | 9 +- 15 files changed, 540 insertions(+), 47 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 36da3800cc..b7e82c1273 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -39,6 +39,10 @@ Features of the OCTEON cnxk SSO PMD are: time granularity of 2.5us on CN9K and 1us on CN10K. - Up to 256 TIM rings a.k.a event timer adapters. - Up to 8 rings traversed in parallel. +- HW managed packets enqueued from ethdev to eventdev exposed through event eth + RX adapter. +- N:1 ethernet device Rx queue to Event queue mapping. +- Full Rx offload support defined through ethdev queue configuration. Prerequisites and Compilation procedure --------------------------------------- @@ -93,6 +97,15 @@ Runtime Config Options -a 0002:0e:00.0,qos=[1-50-50-50] +- ``Force Rx Back pressure`` + + Force Rx back pressure when same mempool is used across ethernet device + connected to event device. + + For example:: + + -a 0002:0e:00.0,force_rx_bp=1 + - ``TIM disable NPA`` By default chunks are allocated from NPA then TIM can automatically free @@ -160,3 +173,18 @@ Debugging Options +---+------------+-------------------------------------------------------+ | 2 | TIM | --log-level='pmd\.event\.cnxk\.timer,8' | +---+------------+-------------------------------------------------------+ + +Limitations +----------- + +Rx adapter support +~~~~~~~~~~~~~~~~~~ + +Using the same mempool for all the ethernet device ports connected to +event device would cause back pressure to be asserted only on the first +ethernet device. +Back pressure is automatically disabled when using same mempool for all the +ethernet devices connected to event device to override this applications can +use `force_rx_bp=1` device arguments. +Using unique mempool per each ethernet device is recommended when they are +connected to event device. diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 31e49e1a56..3892c8017a 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -60,6 +60,11 @@ New Features * Added net/cnxk driver which provides the support for the integrated ethernet device. +* **Added support for Marvell CN10K, CN9K, event Rx adapter.** + + * Added Rx adapter support for event/cnxk when the ethernet device requested is + net/cnxk. + Removed Items ------------- diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index bb69027956..76613fe84e 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix); +void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, + uint8_t ena, uint8_t force); + /* NPC */ int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable); diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c index 47be8aa3f8..f17eba4169 100644 --- a/drivers/common/cnxk/roc_nix_fc.c +++ b/drivers/common/cnxk/roc_nix_fc.c @@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode mode) exit: return rc; } + +void +rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena, + uint8_t force) +{ + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + struct npa_lf *lf = idev_npa_obj_get(); + struct npa_aq_enq_req *req; + struct npa_aq_enq_rsp *rsp; + struct mbox *mbox; + uint32_t limit; + int rc; + + if (roc_nix_is_sdp(roc_nix)) + return; + + if (!lf) + return; + mbox = lf->mbox; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_READ; + + rc = mbox_process_msg(mbox, (void *)&rsp); + if (rc) + return; + + limit = rsp->aura.limit; + /* BP is already enabled. */ + if (rsp->aura.bp_ena) { + /* If BP ids don't match disable BP. */ + if ((rsp->aura.nix0_bpid != nix->bpid[0]) && !force) { + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + req->aura.bp_ena = 0; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); + } + return; + } + + /* BP was previously enabled but now disabled skip. */ + if (rsp->aura.bp) + return; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + if (ena) { + req->aura.nix0_bpid = nix->bpid[0]; + req->aura_mask.nix0_bpid = ~(req->aura_mask.nix0_bpid); + req->aura.bp = NIX_RQ_AURA_THRESH( + limit > 128 ? 256 : limit); /* 95% of size*/ + req->aura_mask.bp = ~(req->aura_mask.bp); + } + + req->aura.bp_ena = !!ena; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); +} diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h index d9c32df442..9dc0c88a6f 100644 --- a/drivers/common/cnxk/roc_nix_priv.h +++ b/drivers/common/cnxk/roc_nix_priv.h @@ -16,7 +16,8 @@ #define NIX_SQB_LOWER_THRESH ((uint16_t)70) /* Apply BP/DROP when CQ is 95% full */ -#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_RQ_AURA_THRESH(x) (((x) * 95) / 100) /* IRQ triggered when NIX_LF_CINTX_CNT[QCOUNT] crosses this value */ #define CQ_CQE_THRESH_DEFAULT 0x1ULL diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map index 8a5c839e57..cb1ce4b6fc 100644 --- a/drivers/common/cnxk/version.map +++ b/drivers/common/cnxk/version.map @@ -29,6 +29,7 @@ INTERNAL { roc_nix_fc_config_set; roc_nix_fc_mode_set; roc_nix_fc_mode_get; + rox_nix_fc_npa_bp_cfg; roc_nix_get_base_chan; roc_nix_get_pf; roc_nix_get_pf_func; diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index bf4052c76c..2060c8fe84 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -6,18 +6,6 @@ #include "cnxk_eventdev.h" #include "cnxk_worker.h" -static void -cn10k_init_hws_ops(struct cn10k_sso_hws *ws, uintptr_t base) -{ - ws->tag_wqe_op = base + SSOW_LF_GWS_WQE0; - ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0; - ws->updt_wqe_op = base + SSOW_LF_GWS_OP_UPD_WQP_GRP1; - ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM; - ws->swtag_untag_op = base + SSOW_LF_GWS_OP_SWTAG_UNTAG; - ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH; - ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED; -} - static uint32_t cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev) { @@ -56,7 +44,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); - cn10k_init_hws_ops(ws, ws->base); ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -135,13 +122,14 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cq_ds_cnt &= 0x3FFF3FFF0000; while (aq_cnt || cq_ds_cnt || ds_cnt) { - plt_write64(req, ws->getwrk_op); + plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0); cn10k_sso_hws_get_work_empty(ws, &ev); if (fn != NULL && ev.u64 != 0) fn(arg, ev); if (ev.sched_type != SSO_TT_EMPTY) - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, - ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush( + ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); do { val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE); } while (val & BIT_ULL(56)); @@ -205,9 +193,11 @@ cn10k_sso_hws_reset(void *arg, void *hws) if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) { - plt_write64(BIT_ULL(16) | 1, ws->getwrk_op); + plt_write64(BIT_ULL(16) | 1, + ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0)); if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */ @@ -407,6 +397,80 @@ cn10k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn10k)); } +static int +cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } +} + +static int +cn10k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn10k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn10k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -420,6 +484,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .port_unlink = cn10k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -502,6 +572,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn10k, cn10k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN10K_SSO_GW_MODE "=<int>" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index e2aa534c64..5dbae275ba 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -18,7 +18,8 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev) cn10k_sso_hws_forward_event(ws, ev); break; case RTE_EVENT_OP_RELEASE: - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); break; default: return 0; @@ -69,7 +70,7 @@ cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return 1; } @@ -94,7 +95,7 @@ cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return ret; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 2f093a8dd5..c7250bf9e7 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn10k_ethdev.h" +#include "cn10k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t @@ -31,7 +35,8 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) { const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(ws->tag_wqe_op)); + const uint8_t cur_tt = + CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)); /* CNXK model * cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED @@ -43,9 +48,11 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) if (new_tt == SSO_TT_UNTAGGED) { if (cur_tt != SSO_TT_UNTAGGED) - cnxk_sso_hws_swtag_untag(ws->swtag_untag_op); + cnxk_sso_hws_swtag_untag(ws->base + + SSOW_LF_GWS_OP_SWTAG_UNTAG); } else { - cnxk_sso_hws_swtag_norm(tag, new_tt, ws->swtag_norm_op); + cnxk_sso_hws_swtag_norm(tag, new_tt, + ws->base + SSOW_LF_GWS_OP_SWTAG_NORM); } ws->swtag_req = 1; } @@ -57,8 +64,9 @@ cn10k_sso_hws_fwd_group(struct cn10k_sso_hws *ws, const struct rte_event *ev, const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - plt_write64(ev->u64, ws->updt_wqe_op); - cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op); + plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1); + cnxk_sso_hws_swtag_desched(tag, new_tt, grp, + ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED); } static __rte_always_inline void @@ -68,7 +76,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, const uint8_t grp = ev->queue_id; /* Group hasn't changed, Use SWTAG to forward the event */ - if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_wqe_op)) == grp) + if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp) cn10k_sso_hws_fwd_swtag(ws, ev); else /* @@ -93,12 +101,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" : [wdata] "+r"(gw.get_work) - : [gw_loc] "r"(ws->getwrk_op) + : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else - plt_write64(gw.u64[0], ws->getwrk_op); + plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | @@ -130,11 +139,12 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) - : [tag_loc] "r"(ws->tag_wqe_op) + : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 0684417eab..072800c243 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -481,6 +481,88 @@ cn9k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn9k)); } +static int +cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + dws->lookup_mem = lookup_mem; + dws->tstamp = tstmp_info; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } + } +} + +static int +cn9k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn9k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn9k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -494,6 +576,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .port_unlink = cn9k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -571,6 +659,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn9k, cn9k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn9k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn9k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN9K_SSO_SINGLE_WS "=1" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 38fca08fb6..f5a4401465 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN9K_WORKER_H__ #define __CN9K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn9k_ethdev.h" +#include "cn9k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c index 7189ee3a79..cfd7fb971c 100644 --- a/drivers/event/cnxk/cnxk_eventdev.c +++ b/drivers/event/cnxk/cnxk_eventdev.c @@ -571,6 +571,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs) &dev->xae_cnt); rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict, dev); + rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_value, + &dev->force_ena_bp); rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_value, &single_ws); rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 668e51d62a..b65d725f55 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -6,6 +6,8 @@ #define __CNXK_EVENTDEV_H__ #include <rte_devargs.h> +#include <rte_ethdev.h> +#include <rte_event_eth_rx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -18,6 +20,7 @@ #define CNXK_SSO_XAE_CNT "xae_cnt" #define CNXK_SSO_GGRP_QOS "qos" +#define CNXK_SSO_FORCE_BP "force_rx_bp" #define CN9K_SSO_SINGLE_WS "single_ws" #define CN10K_SSO_GW_MODE "gw_mode" @@ -81,7 +84,10 @@ struct cnxk_sso_evdev { uint64_t nb_xaq_cfg; rte_iova_t fc_iova; struct rte_mempool *xaq_pool; + uint64_t rx_offloads; uint64_t adptr_xae_cnt; + uint16_t rx_adptr_pool_cnt; + uint64_t *rx_adptr_pools; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -89,25 +95,18 @@ struct cnxk_sso_evdev { uint32_t xae_cnt; uint8_t qos_queue_cnt; struct cnxk_sso_qos *qos_parse_data; + uint8_t force_ena_bp; /* CN9K */ uint8_t dual_ws; /* CN10K */ uint8_t gw_mode; } __rte_cache_aligned; -/* CN10K HWS ops */ -#define CN10K_SSO_HWS_OPS \ - uintptr_t swtag_desched_op; \ - uintptr_t swtag_flush_op; \ - uintptr_t swtag_untag_op; \ - uintptr_t swtag_norm_op; \ - uintptr_t updt_wqe_op; \ - uintptr_t tag_wqe_op; \ - uintptr_t getwrk_op - struct cn10k_sso_hws { - /* Get Work Fastpath data */ - CN10K_SSO_HWS_OPS; + uint64_t base; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint32_t gw_wdata; uint8_t swtag_req; uint8_t hws_id; @@ -115,7 +114,6 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; uintptr_t lmt_base; } __rte_cache_aligned; @@ -132,6 +130,9 @@ struct cn10k_sso_hws { struct cn9k_sso_hws { /* Get Work Fastpath data */ CN9K_SSO_HWS_OPS; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t hws_id; /* Add Work Fastpath data */ @@ -148,6 +149,9 @@ struct cn9k_sso_hws_state { struct cn9k_sso_hws_dual { /* Get Work Fastpath data */ struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */ + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t vws; /* Ping pong bit */ uint8_t hws_id; @@ -250,4 +254,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev, /* CN9K */ void cn9k_sso_set_rsrc(void *arg); +/* Common adapter ops */ +int cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf); +int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id); +int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); +int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); + #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 89a1d82c14..24bfd985e7 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -2,6 +2,7 @@ * Copyright(C) 2021 Marvell. */ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" void @@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, int i; switch (event_type) { + case RTE_EVENT_TYPE_ETHDEV: { + struct cnxk_eth_rxq_sp *rxq = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->rx_adptr_pool_cnt; i++) { + if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i]) + return; + } + + dev->rx_adptr_pool_cnt++; + old_ptr = dev->rx_adptr_pools; + dev->rx_adptr_pools = rte_realloc( + dev->rx_adptr_pools, + sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0); + if (dev->rx_adptr_pools == NULL) { + dev->adptr_xae_cnt += rxq->qconf.mp->size; + dev->rx_adptr_pools = old_ptr; + dev->rx_adptr_pool_cnt--; + return; + } + dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] = + (uint64_t)rxq->qconf.mp; + + dev->adptr_xae_cnt += rxq->qconf.mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; @@ -65,3 +92,152 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, break; } } + +static int +cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, + uint16_t port_id, const struct rte_event *ev, + uint8_t custom_flowid) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 1; + rq->tt = ev->sched_type; + rq->hwgrp = ev->queue_id; + rq->flow_tag_width = 20; + rq->wqe_skip = 1; + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4)) + << 24; + + if (custom_flowid) { + rq->flow_tag_width = 0; + rq->tag_mask |= ev->flow_id; + } + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 0; + rq->flow_tag_width = 32; + rq->tag_mask = 0; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +int +cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t port = eth_dev->data->port_id; + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + rxq_sp = eth_dev->data->rx_queues[i]; + rxq_sp = rxq_sp - 1; + cnxk_sso_updt_xae_cnt(dev, rxq_sp, + RTE_EVENT_TYPE_ETHDEV); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc |= cnxk_sso_rxq_enable( + cnxk_eth_dev, i, port, &queue_conf->ev, + !!(queue_conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID)); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, true, + dev->force_ena_bp); + } + } else { + rxq_sp = eth_dev->data->rx_queues[rx_queue_id]; + rxq_sp = rxq_sp - 1; + cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc |= cnxk_sso_rxq_enable( + cnxk_eth_dev, (uint16_t)rx_queue_id, port, + &queue_conf->ev, + !!(queue_conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID)); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, true, + dev->force_ena_bp); + } + + if (rc < 0) { + plt_err("Failed to configure Rx adapter port=%d, q=%d", port, + queue_conf->ev.queue_id); + return rc; + } + + dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags; + + return 0; +} + +int +cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + RTE_SET_USED(event_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + rxq_sp = eth_dev->data->rx_queues[rx_queue_id]; + rxq_sp = rxq_sp - 1; + rc = cnxk_sso_rxq_disable(cnxk_eth_dev, i); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, false, + dev->force_ena_bp); + } + } else { + rxq_sp = eth_dev->data->rx_queues[rx_queue_id]; + rxq_sp = rxq_sp - 1; + rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, false, + dev->force_ena_bp); + } + + if (rc < 0) + plt_err("Failed to clear Rx adapter config port=%d, q=%d", + eth_dev->data->port_id, rx_queue_id); + + return rc; +} + +int +cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +int +cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index 87bb9f76a9..eda562f5b5 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -21,4 +21,11 @@ sources = files( 'cnxk_tim_worker.c', ) -deps += ['bus_pci', 'common_cnxk'] +extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing'] +foreach flag: extra_flags + if cc.has_argument(flag) + cflags += flag + endif +endforeach + +deps += ['bus_pci', 'common_cnxk', 'net_cnxk'] -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 136 +++++++- drivers/event/cnxk/cn10k_worker.c | 54 ---- drivers/event/cnxk/cn10k_worker.h | 97 +++++- drivers/event/cnxk/cn10k_worker_deq.c | 44 +++ drivers/event/cnxk/cn10k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn10k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_eventdev.c | 305 +++++++++++++++++- drivers/event/cnxk/cn9k_worker.c | 117 ------- drivers/event/cnxk/cn9k_worker.h | 174 ++++++++-- drivers/event/cnxk/cn9k_worker_deq.c | 44 +++ drivers/event/cnxk/cn9k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn9k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_worker_dual_deq.c | 53 +++ .../event/cnxk/cn9k_worker_dual_deq_burst.c | 30 ++ drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 89 +++++ drivers/event/cnxk/cnxk_eventdev.h | 1 + drivers/event/cnxk/meson.build | 9 + 17 files changed, 1124 insertions(+), 231 deletions(-) create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 2060c8fe84..ba7d95fff7 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -237,17 +237,141 @@ static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn10k_sso_hws_deq; - event_dev->dequeue_burst = cn10k_sso_hws_deq_burst; - if (dev->is_timeout_deq) { - event_dev->dequeue = cn10k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } } diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index 5dbae275ba..c71aa37327 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -60,57 +60,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return 1; - } - - return cn10k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return ret; - } - - ret = cn10k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn10k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index c7250bf9e7..b724083caa 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -87,20 +87,37 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, cn10k_sso_hws_fwd_group(ws, ev, grp); } +static __rte_always_inline void +cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t -cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) +cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, void *lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; gw.get_work = ws->gw_wdata; #if defined(RTE_ARCH_ARM64) && !defined(__clang__) asm volatile( PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" - : [wdata] "+r"(gw.get_work) + "sub %[mbuf], %H[wdata], #0x80 \n" + : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf) : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else @@ -109,11 +126,34 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -128,6 +168,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -138,7 +179,9 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " ldp %[tag], %[wqp], [%[tag_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else @@ -146,12 +189,25 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -170,16 +226,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn10k_worker_deq.c b/drivers/event/cnxk/cn10k_worker_deq.c new file mode 100644 index 0000000000..36ec454ccc --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_burst.c b/drivers/event/cnxk/cn10k_worker_deq_burst.c new file mode 100644 index 0000000000..29ecc551cf --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_tmo.c b/drivers/event/cnxk/cn10k_worker_deq_tmo.c new file mode 100644 index 0000000000..c8524a27bd --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 072800c243..e386cb784a 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -252,17 +252,202 @@ static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + /* Single WS modes */ + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + /* Dual WS modes */ + const event_dequeue_t sso_hws_dual_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn9k_sso_hws_deq; - event_dev->dequeue_burst = cn9k_sso_hws_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } if (dev->dual_ws) { @@ -272,14 +457,110 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) event_dev->enqueue_forward_burst = cn9k_sso_hws_dual_enq_fwd_burst; - event_dev->dequeue = cn9k_sso_hws_dual_deq; - event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq; - event_dev->dequeue_burst = - cn9k_sso_hws_dual_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_dual_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_dual_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } } } + + rte_mb(); } static void * diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c index 9ceacc98dd..538bc4b0b3 100644 --- a/drivers/event/cnxk/cn9k_worker.c +++ b/drivers/event/cnxk/cn9k_worker.c @@ -60,60 +60,6 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } -uint16_t __rte_hot -cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return 1; - } - - return cn9k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return ret; - } - - ret = cn9k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn9k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} - /* Dual ws ops. */ uint16_t __rte_hot @@ -171,66 +117,3 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t gw; - - RTE_SET_USED(timeout_ticks); - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return 1; - } - - gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - return gw; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t ret = 1; - uint64_t iter; - - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return ret; - } - - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - } - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index f5a4401465..c01c00e1da 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -128,17 +128,36 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, } } +static __rte_always_inline void +cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, struct cn9k_sso_hws_state *ws_pair, - struct rte_event *ev) + struct rte_event *ev, const uint32_t flags, + const void *const lookup_mem, + struct cnxk_timesync_info *const tstamp) { const uint64_t set_gw = BIT_ULL(16) | 1; union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE "rty%=: \n" @@ -147,7 +166,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, " tbnz %[tag], 63, rty%= \n" "done%=: str %[gw], [%[pong]] \n" " dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op), [gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op)); #else @@ -156,12 +178,34 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); plt_write64(set_gw, ws_pair->getwrk_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -169,16 +213,22 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, } static __rte_always_inline uint16_t -cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) +cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, const void *const lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; plt_write64(BIT_ULL(16) | /* wait for work. */ 1, /* Use Mask set 0. */ ws->getwrk_op); + + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE " ldr %[tag], [%[tag_loc]] \n" @@ -190,7 +240,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -198,12 +251,35 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -218,6 +294,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -230,7 +307,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -238,12 +317,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -274,28 +366,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); - -uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c new file mode 100644 index 0000000000..51ccaf4ec4 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_burst.c b/drivers/event/cnxk/cn9k_worker_deq_burst.c new file mode 100644 index 0000000000..4e2801459b --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c new file mode 100644 index 0000000000..9713d1ef00 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c new file mode 100644 index 0000000000..709fa2d9ef --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c new file mode 100644 index 0000000000..d50e1cf83f --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c new file mode 100644 index 0000000000..a0508fdf0d --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_##name(port, ev, \ + timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index b65d725f55..9d5d2d0339 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -33,6 +33,7 @@ #define CNXK_SSO_MZ_NAME "cnxk_evdev_mz" #define CNXK_SSO_XAQ_CACHE_CNT (0x7) #define CNXK_SSO_XAQ_SLACK (8) +#define CNXK_SSO_WQE_SG_PTR (9) #define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) #define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY) diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index eda562f5b5..c5c1c0ee8e 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -11,8 +11,17 @@ endif sources = files( 'cn9k_eventdev.c', 'cn9k_worker.c', + 'cn9k_worker_deq.c', + 'cn9k_worker_deq_burst.c', + 'cn9k_worker_deq_tmo.c', + 'cn9k_worker_dual_deq.c', + 'cn9k_worker_dual_deq_burst.c', + 'cn9k_worker_dual_deq_tmo.c', 'cn10k_eventdev.c', 'cn10k_worker.c', + 'cn10k_worker_deq.c', + 'cn10k_worker_deq_burst.c', + 'cn10k_worker_deq_tmo.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 4 +- doc/guides/rel_notes/release_21_08.rst | 6 +- drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++++++ drivers/event/cnxk/cn9k_eventdev.c | 117 +++++++++++++++++++++++ drivers/event/cnxk/cnxk_eventdev.h | 21 +++- drivers/event/cnxk/cnxk_eventdev_adptr.c | 106 ++++++++++++++++++++ 6 files changed, 339 insertions(+), 6 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index b7e82c1273..6fdccc2ab4 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -42,7 +42,9 @@ Features of the OCTEON cnxk SSO PMD are: - HW managed packets enqueued from ethdev to eventdev exposed through event eth RX adapter. - N:1 ethernet device Rx queue to Event queue mapping. -- Full Rx offload support defined through ethdev queue configuration. +- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` + capability while maintaining receive packet order. +- Full Rx/Tx offload support defined through ethdev queue configuration. Prerequisites and Compilation procedure --------------------------------------- diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 3892c8017a..80ff93269c 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -60,10 +60,10 @@ New Features * Added net/cnxk driver which provides the support for the integrated ethernet device. -* **Added support for Marvell CN10K, CN9K, event Rx adapter.** +* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.** - * Added Rx adapter support for event/cnxk when the ethernet device requested is - net/cnxk. + * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested + is net/cnxk. Removed Items diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index ba7d95fff7..8a9b04a3db 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); + ws->tx_base = ws->base; ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn10k_sso_hws) + + (sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + + return 0; +} + static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset, cn10k_sso_hws_flush_events); if (rc < 0) @@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + return cn10k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index e386cb784a..bdc5632235 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(dws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + dws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&dws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = dws; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + } + rte_mb(); + + return 0; +} + static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset, cn9k_sso_hws_flush_events); if (rc < 0) @@ -844,6 +908,55 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + return cn9k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -863,6 +976,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 9d5d2d0339..458fdc8d92 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -8,6 +8,7 @@ #include <rte_devargs.h> #include <rte_ethdev.h> #include <rte_event_eth_rx_adapter.h> +#include <rte_event_eth_tx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -86,9 +87,12 @@ struct cnxk_sso_evdev { rte_iova_t fc_iova; struct rte_mempool *xaq_pool; uint64_t rx_offloads; + uint64_t tx_offloads; uint64_t adptr_xae_cnt; uint16_t rx_adptr_pool_cnt; uint64_t *rx_adptr_pools; + uint64_t *tx_adptr_data; + uint16_t max_port_id; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -115,7 +119,10 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; + /* Tx Fastpath data */ + uint64_t tx_base __rte_cache_aligned; uintptr_t lmt_base; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; /* CN9K HWS ops */ @@ -140,7 +147,9 @@ struct cn9k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; + /* Tx Fastpath data */ + uint64_t base __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cn9k_sso_hws_state { @@ -160,7 +169,9 @@ struct cn9k_sso_hws_dual { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base[2]; + /* Tx Fastpath data */ + uint64_t base[2] __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cnxk_sso_hws_cookie { @@ -267,5 +278,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); +int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); +int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 24bfd985e7..548d7b81ce 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -5,6 +5,8 @@ #include "cnxk_ethdev.h" #include "cnxk_eventdev.h" +#define CNXK_SSO_SQB_LIMIT (0x180) + void cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, uint32_t event_type) @@ -241,3 +243,107 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, return 0; } + +static int +cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs) +{ + uint16_t sqb_limit; + + sqb_limit = RTE_MIN(nb_sqb_bufs, sq->nb_sqb_bufs); + return roc_npa_aura_limit_modify(sq->aura_handle, sqb_limit); +} + +static int +cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev, + uint16_t eth_port_id, uint16_t tx_queue_id, + void *txq) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t max_port_id = dev->max_port_id; + uint64_t *txq_data = dev->tx_adptr_data; + + if (txq_data == NULL || eth_port_id > max_port_id) { + max_port_id = RTE_MAX(max_port_id, eth_port_id); + txq_data = rte_realloc_socket( + txq_data, + (sizeof(uint64_t) * (max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, event_dev->data->socket_id); + if (txq_data == NULL) + return -ENOMEM; + } + + ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) + txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq; + dev->max_port_id = max_port_id; + dev->tx_adptr_data = txq_data; + return 0; +} + +int +cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct roc_nix_sq *sq; + int i, ret; + void *txq; + + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { + txq = eth_dev->data->tx_queues[i]; + sq = &cnxk_eth_dev->sqs[i]; + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, i, txq); + if (ret < 0) + return ret; + } + } else { + txq = eth_dev->data->tx_queues[tx_queue_id]; + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, txq); + if (ret < 0) + return ret; + } + + dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags; + + return 0; +} + +int +cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct roc_nix_sq *sq; + int i, ret; + + RTE_SET_USED(event_dev); + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { + sq = &cnxk_eth_dev->sqs[i]; + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, + NULL); + if (ret < 0) + return ret; + } + } else { + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, NULL); + if (ret < 0) + return ret; + } + + return 0; +} -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 38 ++++++++ drivers/event/cnxk/cn10k_worker.h | 67 ++++++++++++++ drivers/event/cnxk/cn10k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn10k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_eventdev.c | 81 +++++++++++++++++ drivers/event/cnxk/cn9k_worker.h | 87 +++++++++++++++++++ drivers/event/cnxk/cn9k_worker_dual_tx_enq.c | 23 +++++ .../event/cnxk/cn9k_worker_dual_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/meson.build | 6 ++ 11 files changed, 417 insertions(+) create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 8a9b04a3db..e462f770c5 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; @@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; } static void diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index b724083caa..3c90c85009 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -11,6 +11,7 @@ #include "cn10k_ethdev.h" #include "cn10k_rx.h" +#include "cn10k_tx.h" /* SSO Operations */ @@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn10k_eth_txq * +cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn10k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline uint16_t +cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, + uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + const struct cn10k_eth_txq *txq; + struct rte_mbuf *m = ev->mbuf; + uint16_t ref_cnt = m->refcnt; + uintptr_t lmt_addr; + uint16_t lmt_id; + uintptr_t pa; + + lmt_addr = ws->lmt_base; + ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + txq = cn10k_sso_hws_xtract_meta(m, txq_data); + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; + } + if (!ev->sched_type) + cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, + ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c new file mode 100644 index 0000000000..f9968ac0d0 --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c new file mode 100644 index 0000000000..a24fc42e5a --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index bdc5632235..af97020f2f 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -430,6 +430,39 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; @@ -510,6 +543,25 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) } } + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + if (dev->dual_ws) { event_dev->enqueue = cn9k_sso_hws_dual_enq; event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst; @@ -618,8 +670,37 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] + */ + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } } + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; rte_mb(); } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index c01c00e1da..5aa053c586 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -11,6 +11,7 @@ #include "cn9k_ethdev.h" #include "cn9k_rx.h" +#include "cn9k_tx.h" /* SSO Operations */ @@ -416,4 +417,90 @@ NIX_RX_FASTPATH_MODES NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn9k_eth_txq * +cn9k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn9k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline void +cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m, + uint64_t *cmd, const uint32_t flags) +{ + roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags)); + cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt); +} + +static __rte_always_inline uint16_t +cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + struct rte_mbuf *m = ev->mbuf; + const struct cn9k_eth_txq *txq; + uint16_t ref_cnt = m->refcnt; + + /* Perform header writes before barrier for TSO */ + cn9k_nix_xmit_prepare_tso(m, flags); + /* Lets commit any changes in the packet here in case when + * fast free is set as no further changes will be made to mbuf. + * In case of fast free is not set, both cn9k_nix_prepare_mseg() + * and cn9k_nix_xmit_prepare() has a barrier after refcnt update. + */ + if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_io_wmb(); + txq = cn9k_sso_hws_xtract_meta(m, txq_data); + cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags); + + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, + txq->io_addr, segdw); + } else { + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, + segdw); + } + } else { + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_one(cmd, txq->lmt_addr, + txq->io_addr, flags); + } else { + cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, + flags); + } + } + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG, + base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c new file mode 100644 index 0000000000..92e2981f02 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws_dual *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c new file mode 100644 index 0000000000..dfb574cf95 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws_dual *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c new file mode 100644 index 0000000000..3df649c0c8 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c new file mode 100644 index 0000000000..0efe29113e --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index c5c1c0ee8e..13e0634e86 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -17,11 +17,17 @@ sources = files( 'cn9k_worker_dual_deq.c', 'cn9k_worker_dual_deq_burst.c', 'cn9k_worker_dual_deq_tmo.c', + 'cn9k_worker_tx_enq.c', + 'cn9k_worker_tx_enq_seg.c', + 'cn9k_worker_dual_tx_enq.c', + 'cn9k_worker_dual_tx_enq_seg.c', 'cn10k_eventdev.c', 'cn10k_worker.c', 'cn10k_worker_deq.c', 'cn10k_worker_deq_burst.c', 'cn10k_worker_deq_tmo.c', + 'cn10k_worker_tx_enq.c', + 'cn10k_worker_tx_enq_seg.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add event vector support for cnxk event Rx adapter, add control path APIs to get vector limits and ability to configure event vectorization on a given Rx queue. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 2 + drivers/event/cnxk/cn10k_eventdev.c | 106 ++++++++++++++++++++++- drivers/event/cnxk/cnxk_eventdev.h | 2 + drivers/event/cnxk/cnxk_eventdev_adptr.c | 25 ++++++ drivers/net/cnxk/cnxk_ethdev.h | 2 +- 5 files changed, 135 insertions(+), 2 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 6fdccc2ab4..0297cd3d5f 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -45,6 +45,8 @@ Features of the OCTEON cnxk SSO PMD are: - Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` capability while maintaining receive packet order. - Full Rx/Tx offload support defined through ethdev queue configuration. +- HW managed event vectorization on CN10K for packets enqueued from ethdev to + eventdev configurable per each Rx queue in Rx adapter. Prerequisites and Compilation procedure --------------------------------------- diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e462f770c5..e85fa4785d 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, else *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | - RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID | + RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR; return 0; } @@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_rx_adapter_vector_limits( + const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, + struct rte_event_eth_rx_adapter_vector_limits *limits) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + return -ENOTSUP; + + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + limits->log2_sz = true; + limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2; + limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2; + limits->min_timeout_ns = + (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100; + limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns; + + return 0; +} + +static int +cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev, + uint16_t port_id, uint16_t rq_id, uint16_t sz, + uint64_t tmo_ns, struct rte_mempool *vmp) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + + if (!rq->sso_ena) + return -EINVAL; + if (rq->flow_tag_width == 0) + return -EINVAL; + + rq->vwqe_ena = 1; + rq->vwqe_first_skip = 0; + rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id); + rq->vwqe_max_sz_exp = rte_log2_u32(sz); + rq->vwqe_wait_tmo = + tmo_ns / + ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100); + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= + (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4)) + << 24; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cn10k_sso_rx_adapter_vector_config( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_event_vector_config *config) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + struct cnxk_sso_evdev *dev; + int i, rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + dev = cnxk_sso_pmd_priv(event_dev); + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, i, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + } else { + + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, rx_queue_id, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + + return 0; +} + static int cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, uint32_t *caps) @@ -739,6 +839,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_rx_adapter_vector_limits_get = cn10k_sso_rx_adapter_vector_limits, + .eth_rx_adapter_event_vector_config = + cn10k_sso_rx_adapter_vector_config, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 458fdc8d92..3783e0c95b 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -96,6 +96,8 @@ struct cnxk_sso_evdev { uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; + uint16_t vec_pool_cnt; + uint64_t *vec_pools; /* Dev args */ uint32_t xae_cnt; uint8_t qos_queue_cnt; diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 548d7b81ce..c4c4f5a7f4 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -40,6 +40,31 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, dev->adptr_xae_cnt += rxq->qconf.mp->size; break; } + case RTE_EVENT_TYPE_ETHDEV_VECTOR: { + struct rte_mempool *mp = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->vec_pool_cnt; i++) { + if ((uint64_t)mp == dev->vec_pools[i]) + return; + } + + dev->vec_pool_cnt++; + old_ptr = dev->vec_pools; + dev->vec_pools = + rte_realloc(dev->vec_pools, + sizeof(uint64_t) * dev->vec_pool_cnt, 0); + if (dev->vec_pools == NULL) { + dev->adptr_xae_cnt += mp->size; + dev->vec_pools = old_ptr; + dev->vec_pool_cnt--; + return; + } + dev->vec_pools[dev->vec_pool_cnt - 1] = (uint64_t)mp; + + dev->adptr_xae_cnt += mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h index 4eead03905..2528b3cdaa 100644 --- a/drivers/net/cnxk/cnxk_ethdev.h +++ b/drivers/net/cnxk/cnxk_ethdev.h @@ -238,7 +238,7 @@ struct cnxk_eth_txq_sp { } __plt_cache_aligned; static inline struct cnxk_eth_dev * -cnxk_eth_pmd_priv(struct rte_eth_dev *eth_dev) +cnxk_eth_pmd_priv(const struct rte_eth_dev *eth_dev) { return eth_dev->data->dev_private; } -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Rx event vector fastpath to convert HW defined metadata into rte_mbuf and rte_event_vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/rel_notes/release_21_08.rst | 1 + drivers/event/cnxk/cn10k_worker.h | 56 +++++++ drivers/net/cnxk/cn10k_rx.h | 200 +++++++++++++++---------- drivers/net/cnxk/cn10k_rx_vec.c | 2 +- drivers/net/cnxk/cn10k_rx_vec_mseg.c | 5 +- 5 files changed, 179 insertions(+), 85 deletions(-) diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 80ff93269c..11ccc9bcb5 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -64,6 +64,7 @@ New Features * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested is net/cnxk. + * Add support for event vectorization for Rx adapter. Removed Items diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 3c90c85009..7a48a6b17d 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,6 +5,8 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include <rte_vect.h> + #include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" @@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, mbuf_init | ((uint64_t)port_id) << 48, flags); } +static __rte_always_inline void +cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, + void *lookup_mem, void *tstamp) +{ + uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + struct rte_event_vector *vec; + uint16_t nb_mbufs, non_vec; + uint64_t **wqe; + + mbuf_init |= ((uint64_t)port_id) << 48; + vec = (struct rte_event_vector *)vwqe; + wqe = vec->u64s; + + nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP); + nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs, + flags | NIX_RX_VWQE_F, lookup_mem, + tstamp); + wqe += nb_mbufs; + non_vec = vec->nb_elem - nb_mbufs; + + while (non_vec) { + struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0]; + struct rte_mbuf *mbuf; + uint64_t tstamp_ptr; + + mbuf = (struct rte_mbuf *)((char *)cqe - + sizeof(struct rte_mbuf)); + cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, + mbuf_init, flags); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + wqe[0] = (uint64_t *)mbuf; + non_vec--; + wqe++; + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, const uint32_t flags, void *lookup_mem) @@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, flags & NIX_RX_MULTI_SEG_F, (uint64_t *)tstamp_ptr); gw.u64[1] = mbuf; + } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV_VECTOR) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1]; + + vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | + ((vwqe_hdr & 0xFFFF) << 48) | + ((uint64_t)port << 32); + *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr; + cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem, + ws->tstamp); } } diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index abdd58e888..075e1124ed 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -21,6 +21,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_RX_VWQE_F BIT(14) #define NIX_RX_MULTI_SEG_F BIT(15) #define CNXK_NIX_CQ_ENTRY_SZ 128 @@ -28,6 +29,11 @@ #define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x)) #define CQE_SZ(x) ((x) * CNXK_NIX_CQ_ENTRY_SZ) +#define CQE_PTR_OFF(b, i, o, f) \ + (((f) & NIX_RX_VWQE_F) ? \ + (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \ + (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o))) + union mbuf_initializer { struct { uint16_t data_off; @@ -317,61 +323,87 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf) } static __rte_always_inline uint16_t -cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) +cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, + const uint16_t flags, void *lookup_mem, + struct cnxk_timesync_info *tstamp) { - struct cn10k_eth_rxq *rxq = rx_queue; - uint16_t packets = 0; + struct cn10k_eth_rxq *rxq = args; + const uint64_t mbuf_initializer = (flags & NIX_RX_VWQE_F) ? + *(uint64_t *)args : + rxq->mbuf_initializer; + const uint64x2_t data_off = flags & NIX_RX_VWQE_F ? + vdupq_n_u64(0x80ULL) : + vdupq_n_u64(rxq->data_off); + const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask; + const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata; + const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc; uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23; - const uint64_t mbuf_initializer = rxq->mbuf_initializer; - const uint64x2_t data_off = vdupq_n_u64(rxq->data_off); uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3; uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer); struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3; - const uint16_t *lookup_mem = rxq->lookup_mem; - const uint32_t qmask = rxq->qmask; - const uint64_t wdata = rxq->wdata; - const uintptr_t desc = rxq->desc; uint8x16_t f0, f1, f2, f3; - uint32_t head = rxq->head; + uint16_t packets = 0; uint16_t pkts_left; - - pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); - pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); - - /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + uint32_t head; + uintptr_t cq0; + + if (!(flags & NIX_RX_VWQE_F)) { + lookup_mem = rxq->lookup_mem; + head = rxq->head; + + pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); + pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); + /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) + tstamp = rxq->tstamp; + } else { + RTE_SET_USED(head); + } while (packets < pkts) { - /* Exit loop if head is about to wrap and become unaligned */ - if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < - NIX_DESCS_PER_LOOP) { - pkts_left += (pkts - packets); - break; - } + if (!(flags & NIX_RX_VWQE_F)) { + /* Exit loop if head is about to wrap and become + * unaligned. + */ + if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < + NIX_DESCS_PER_LOOP) { + pkts_left += (pkts - packets); + break; + } - const uintptr_t cq0 = desc + CQE_SZ(head); + cq0 = desc + CQE_SZ(head); + } else { + cq0 = (uintptr_t)&mbufs[packets]; + } /* Prefetch N desc ahead */ - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(8))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(9))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(10))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(11))); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 8, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 9, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 10, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 11, 0, flags)); /* Get NIX_RX_SG_S for size and buffer pointer */ - cq0_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(0) + 64)); - cq1_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(1) + 64)); - cq2_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(2) + 64)); - cq3_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(3) + 64)); - - /* Extract mbuf from NIX_RX_SG_S */ - mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); - mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); - mbuf01 = vqsubq_u64(mbuf01, data_off); - mbuf23 = vqsubq_u64(mbuf23, data_off); + cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags)); + cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags)); + cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags)); + cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags)); + + if (!(flags & NIX_RX_VWQE_F)) { + /* Extract mbuf from NIX_RX_SG_S */ + mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); + mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); + mbuf01 = vqsubq_u64(mbuf01, data_off); + mbuf23 = vqsubq_u64(mbuf23, data_off); + } else { + mbuf01 = + vsubq_u64(vld1q_u64((uint64_t *)cq0), data_off); + mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)), + data_off); + } /* Move mbufs to scalar registers for future use */ mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0); @@ -395,14 +427,14 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, f3 = vqtbl1q_u8(cq3_w8, shuf_msk); /* Load CQE word0 and word 1 */ - uint64_t cq0_w0 = ((uint64_t *)(cq0 + CQE_SZ(0)))[0]; - uint64_t cq0_w1 = ((uint64_t *)(cq0 + CQE_SZ(0)))[1]; - uint64_t cq1_w0 = ((uint64_t *)(cq0 + CQE_SZ(1)))[0]; - uint64_t cq1_w1 = ((uint64_t *)(cq0 + CQE_SZ(1)))[1]; - uint64_t cq2_w0 = ((uint64_t *)(cq0 + CQE_SZ(2)))[0]; - uint64_t cq2_w1 = ((uint64_t *)(cq0 + CQE_SZ(2)))[1]; - uint64_t cq3_w0 = ((uint64_t *)(cq0 + CQE_SZ(3)))[0]; - uint64_t cq3_w1 = ((uint64_t *)(cq0 + CQE_SZ(3)))[1]; + const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags); + const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 1, flags); + const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags); + const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 1, flags); + const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags); + const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 1, flags); + const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags); + const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 1, flags); if (flags & NIX_RX_OFFLOAD_RSS_F) { /* Fill rss in the rx_descriptor_fields1 */ @@ -459,17 +491,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) { ol_flags0 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(0) + 38), ol_flags0, - mbuf0); + *(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags), + ol_flags0, mbuf0); ol_flags1 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(1) + 38), ol_flags1, - mbuf1); + *(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags), + ol_flags1, mbuf1); ol_flags2 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(2) + 38), ol_flags2, - mbuf2); + *(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags), + ol_flags2, mbuf2); ol_flags3 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(3) + 38), ol_flags3, - mbuf3); + *(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags), + ol_flags3, mbuf3); } if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { @@ -488,7 +520,7 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, RTE_PTYPE_L2_ETHER_TIMESYNC}; const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | PKT_RX_IEEE1588_TMST | - rxq->tstamp->rx_tstamp_dynflag; + tstamp->rx_tstamp_dynflag; const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; uint64x2_t ts01, ts23, mask; uint64_t ts[4]; @@ -526,14 +558,10 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, ts[3] = vgetq_lane_u64(ts23, 1); /* Store timestamp into dynfield. */ - *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = - ts[0]; - *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = - ts[1]; - *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = - ts[2]; - *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = - ts[3]; + *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3]; /* Generate ptype mask to filter L2 ether timesync */ mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); @@ -559,9 +587,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, /* Update Rxq timestamp with the latest * timestamp. */ - rxq->tstamp->rx_ready = 1; - rxq->tstamp->rx_tstamp = - ts[31 - __builtin_clz(res)]; + tstamp->rx_ready = 1; + tstamp->rx_tstamp = ts[31 - __builtin_clz(res)]; } } @@ -584,25 +611,25 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); /* Store the mbufs to rx_pkts */ - vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); - vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); + vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); if (flags & NIX_RX_MULTI_SEG_F) { /* Multi segment is enable build mseg list for * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 0, 8, flags)), + mbuf0, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 1, 8, flags)), + mbuf1, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 2, 8, flags)), + mbuf2, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 3, 8, flags)), + mbuf3, mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; @@ -623,12 +650,18 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, __mempool_check_cookies(mbuf2->pool, (void **)&mbuf2, 1, 1); __mempool_check_cookies(mbuf3->pool, (void **)&mbuf3, 1, 1); - /* Advance head pointer and packets */ - head += NIX_DESCS_PER_LOOP; - head &= qmask; packets += NIX_DESCS_PER_LOOP; + + if (!(flags & NIX_RX_VWQE_F)) { + /* Advance head pointer and packets */ + head += NIX_DESCS_PER_LOOP; + head &= qmask; + } } + if (flags & NIX_RX_VWQE_F) + return packets; + rxq->head = head; rxq->available -= packets; @@ -637,8 +670,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, plt_write64((rxq->wdata | packets), rxq->cq_door); if (unlikely(pkts_left)) - packets += cn10k_nix_recv_pkts(rx_queue, &rx_pkts[packets], - pkts_left, flags); + packets += cn10k_nix_recv_pkts(args, &mbufs[packets], pkts_left, + flags); return packets; } @@ -647,12 +680,15 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, static inline uint16_t cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) + uint16_t pkts, const uint16_t flags, + void *lookup_mem, void *tstamp) { + RTE_SET_USED(lookup_mem); RTE_SET_USED(rx_queue); RTE_SET_USED(rx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(flags); + RTE_SET_USED(tstamp); return 0; } diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c index 93528a44f9..166735ad59 100644 --- a/drivers/net/cnxk/cn10k_rx_vec.c +++ b/drivers/net/cnxk/cn10k_rx_vec.c @@ -12,7 +12,7 @@ uint16_t pkts) \ { \ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags)); \ + (flags), NULL, NULL); \ } NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c index 04d1e46c82..1f44dddddd 100644 --- a/drivers/net/cnxk/cn10k_rx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c @@ -9,8 +9,9 @@ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ { \ - return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags) | NIX_RX_MULTI_SEG_F); \ + return cn10k_nix_recv_pkts_vector( \ + rx_queue, rx_pkts, pkts, (flags) | NIX_RX_MULTI_SEG_F, \ + NULL, NULL); \ } NIX_RX_FASTPATH_MODES -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Tx event vector fastpath, integrate event vector Tx routine into Tx burst. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 1 + doc/guides/rel_notes/release_21_08.rst | 2 +- drivers/common/cnxk/roc_sso.h | 23 ++++++ drivers/event/cnxk/cn10k_eventdev.c | 3 +- drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++-- drivers/event/cnxk/cn9k_worker.h | 4 +- drivers/event/cnxk/cnxk_worker.h | 22 ------ drivers/net/cnxk/cn10k_tx.c | 2 +- drivers/net/cnxk/cn10k_tx.h | 52 +++++++++---- drivers/net/cnxk/cn10k_tx_mseg.c | 3 +- drivers/net/cnxk/cn10k_tx_vec.c | 2 +- drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +- 12 files changed, 167 insertions(+), 53 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 0297cd3d5f..53560d3830 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -47,6 +47,7 @@ Features of the OCTEON cnxk SSO PMD are: - Full Rx/Tx offload support defined through ethdev queue configuration. - HW managed event vectorization on CN10K for packets enqueued from ethdev to eventdev configurable per each Rx queue in Rx adapter. +- Event vector transmission via Tx adapter. Prerequisites and Compilation procedure --------------------------------------- diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 11ccc9bcb5..9e49cb27d7 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -64,7 +64,7 @@ New Features * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested is net/cnxk. - * Add support for event vectorization for Rx adapter. + * Add support for event vectorization for Rx/Tx adapter. Removed Items diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h index a6030e7d8a..316c6ccd59 100644 --- a/drivers/common/cnxk/roc_sso.h +++ b/drivers/common/cnxk/roc_sso.h @@ -44,6 +44,29 @@ struct roc_sso { uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned; } __plt_cache_aligned; +static __rte_always_inline void +roc_sso_hws_head_wait(uintptr_t tag_op) +{ +#ifdef RTE_ARCH_ARM64 + uint64_t tag; + + asm volatile(PLT_CPU_FEATURE_PREAMBLE + " ldr %[tag], [%[tag_op]] \n" + " tbnz %[tag], 35, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[tag], [%[tag_op]] \n" + " tbz %[tag], 35, rty%= \n" + "done%=: \n" + : [tag] "=&r"(tag) + : [tag_op] "r"(tag_op)); +#else + /* Wait for the SWTAG/SWTAG_FULL operation */ + while (!(plt_read64(tag_op) & BIT_ULL(35))) + ; +#endif +} + /* SSO device initialization */ int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso); int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso); diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e85fa4785d..6f37c5bd23 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, if (ret) *caps = 0; else - *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR; return 0; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 7a48a6b17d..9cc0992063 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R -static __rte_always_inline const struct cn10k_eth_txq * +static __rte_always_inline struct cn10k_eth_txq * cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) { - return (const struct cn10k_eth_txq *) + return (struct cn10k_eth_txq *) txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; } +static __rte_always_inline void +cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs, + uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr, + uint8_t sched_type, uintptr_t base, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + uint16_t port[4], queue[4]; + struct cn10k_eth_txq *txq; + uint16_t i, j; + uintptr_t pa; + + for (i = 0; i < nb_mbufs; i += 4) { + port[0] = mbufs[i]->port; + port[1] = mbufs[i + 1]->port; + port[2] = mbufs[i + 2]->port; + port[3] = mbufs[i + 3]->port; + + queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]); + queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]); + queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]); + queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]); + + if (((port[0] ^ port[1]) & (port[2] ^ port[3])) || + ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) { + + for (j = 0; j < 4; j++) { + struct rte_mbuf *m = mbufs[i + j]; + + txq = (struct cn10k_eth_txq *) + txq_data[port[j]][queue[j]]; + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier + * for TSO + */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, + txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg( + m, (uint64_t *)lmt_addr, + flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | + (cn10k_nix_tx_ext_subs(flags) + 1) + << 4; + } + if (!sched_type) + roc_sso_hws_head_wait(base + + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + } + } else { + txq = (struct cn10k_eth_txq *) + txq_data[port[0]][queue[0]]; + cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base + + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], const uint32_t flags) { - const struct cn10k_eth_txq *txq; - struct rte_mbuf *m = ev->mbuf; - uint16_t ref_cnt = m->refcnt; + struct cn10k_eth_txq *txq; + struct rte_mbuf *m; uintptr_t lmt_addr; + uint16_t ref_cnt; uint16_t lmt_id; uintptr_t pa; lmt_addr = ws->lmt_base; ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + + if (ev->event_type & RTE_EVENT_TYPE_VECTOR) { + struct rte_mbuf **mbufs = ev->vec->mbufs; + uint64_t meta = *(uint64_t *)ev->vec; + + if (meta & BIT(31)) { + txq = (struct cn10k_eth_txq *) + txq_data[meta >> 32][meta >> 48]; + + cn10k_nix_xmit_pkts_vector( + txq, mbufs, meta & 0xFFFF, cmd, + ws->tx_base + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } else { + cn10k_sso_vwqe_split_tx( + mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr, + ev->sched_type, ws->tx_base, txq_data, flags); + } + rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec); + return (meta & 0xFFFF); + } + + m = ev->mbuf; + ref_cnt = m->refcnt; txq = cn10k_sso_hws_xtract_meta(m, txq_data); cn10k_nix_tx_skeleton(txq, cmd, flags); /* Perform header writes before barrier for TSO */ @@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; } if (!ev->sched_type) - cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); roc_lmt_submit_steorl(lmt_id, pa); @@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); - return 1; } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 5aa053c586..ef1e83741a 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -458,7 +458,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, segdw); @@ -469,7 +469,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, } else { if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags); diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h index 4eb46ae162..945132b748 100644 --- a/drivers/event/cnxk/cnxk_worker.h +++ b/drivers/event/cnxk/cnxk_worker.h @@ -75,27 +75,5 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op) #endif } -static __rte_always_inline void -cnxk_sso_hws_head_wait(uintptr_t tag_op) -{ -#ifdef RTE_ARCH_ARM64 - uint64_t swtp; - - asm volatile(PLT_CPU_FEATURE_PREAMBLE - " ldr %[swtb], [%[swtp_loc]] \n" - " tbz %[swtb], 35, done%= \n" - " sevl \n" - "rty%=: wfe \n" - " ldr %[swtb], [%[swtp_loc]] \n" - " tbnz %[swtb], 35, rty%= \n" - "done%=: \n" - : [swtb] "=&r"(swtp) - : [swtp_loc] "r"(tag_op)); -#else - /* Wait for the SWTAG/SWTAG_FULL operation */ - while (plt_read64(tag_op) & BIT_ULL(35)) - ; -#endif -} #endif diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 1f30bab59a..0e1276c60b 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \ - flags); \ + 0, flags); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index b25b20dcb2..e8a99808cc 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -18,6 +18,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_TX_VWQE_F BIT(14) #define NIX_TX_MULTI_SEG_F BIT(15) #define NIX_TX_NEED_SEND_HDR_W1 \ @@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) static __rte_always_inline uint16_t cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, - uint64_t *cmd, const uint16_t flags) + uint64_t *cmd, uintptr_t base, const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; const rte_iova_t io_addr = txq->io_addr; @@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t lso_tun_fmt; uint64_t data; - NIX_XMIT_FC_OR_RETURN(txq, pkts); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } /* Get cmd skeleton */ cn10k_nix_tx_skeleton(txq, cmd, flags); - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; - if (flags & NIX_TX_OFFLOAD_TSO_F) lso_tun_fmt = txq->lso_tun_fmt; @@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2); } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (burst > 16) { data = cn10k_nix_tx_steor_data(flags); @@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; uintptr_t pa0, pa1, lmt_addr = txq->lmt_base; @@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, shft += 3; } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + data0 = (uint64_t)data128; data1 = (uint64_t)(data128 >> 64); /* Make data0 similar to data1 */ @@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; @@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64_t data[2]; } wd; - NIX_XMIT_FC_OR_RETURN(txq, pkts); - - scalar = pkts & (NIX_DESCS_PER_LOOP - 1); - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } else { + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + } - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; /* Perform header writes before barrier for TSO */ if (flags & NIX_TX_OFFLOAD_TSO_F) { for (i = 0; i < pkts; i++) @@ -1972,6 +1986,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (flags & NIX_TX_MULTI_SEG_F) wd.data[0] >>= 16; + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (lnum > 16) { if (!(flags & NIX_TX_MULTI_SEG_F)) @@ -2028,10 +2045,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (unlikely(scalar)) { if (flags & NIX_TX_MULTI_SEG_F) pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, - scalar, cmd, flags); + scalar, cmd, base, + flags); else pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, - cmd, flags); + cmd, base, flags); } return pkts; @@ -2040,13 +2058,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, #else static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { RTE_SET_USED(tx_queue); RTE_SET_USED(tx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(cmd); RTE_SET_USED(flags); + RTE_SET_USED(base); return 0; } #endif diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c index 33f6754722..4ea4c8a4e5 100644 --- a/drivers/net/cnxk/cn10k_tx_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_mseg.c @@ -18,7 +18,8 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \ - (flags) | NIX_TX_MULTI_SEG_F); \ + 0, (flags) \ + | NIX_TX_MULTI_SEG_F); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 34e3737501..a0350496ab 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -18,7 +18,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ - (flags)); \ + 0, (flags)); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c index 1fad81dbad..7f98f79b97 100644 --- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector( \ - tx_queue, tx_pkts, pkts, cmd, \ + tx_queue, tx_pkts, pkts, cmd, 0, \ (flags) | NIX_TX_MULTI_SEG_F); \ } -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add multi-segment Rx vector routine, form the primary mbufs using vector path switch to scalar path when extracting segments. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- Depends-on: http://patches.dpdk.org/project/dpdk/list/?series=17394 v3 Changes: - Spell check. drivers/net/cnxk/cn10k_rx.c | 31 +++++++++++------ drivers/net/cnxk/cn10k_rx.h | 51 +++++++++++++++++++++------- drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++ drivers/net/cnxk/cn9k_rx.c | 31 +++++++++++------ drivers/net/cnxk/cn9k_rx.h | 51 +++++++++++++++++++++------- drivers/net/cnxk/cn9k_rx_vec_mseg.c | 18 ++++++++++ drivers/net/cnxk/meson.build | 2 ++ 7 files changed, 157 insertions(+), 44 deletions(-) create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c index 5c956c06b4..3a9fd71309 100644 --- a/drivers/net/cnxk/cn10k_rx.c +++ b/drivers/net/cnxk/cn10k_rx.c @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev, [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)]; + + rte_atomic_thread_fence(__ATOMIC_RELEASE); } void @@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev) #undef R }; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) - pick_rx_func(eth_dev, nix_eth_rx_burst); - else - pick_rx_func(eth_dev, nix_eth_rx_vec_burst); + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name, - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + NIX_RX_FASTPATH_MODES +#undef R + }; /* Copy multi seg version with no offload for tear down sequence */ if (rte_eal_process_type() == RTE_PROC_PRIMARY) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - rte_mb(); + + /* For PTP enabled, scalar rx function should be chosen as most of the + * PTP apps are implemented to rx burst 1 pkt. + */ + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_burst); + } + + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst); } diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index 1cc37cbaa0..5926ff7f46 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, sg = *(const uint64_t *)(rx + 1); nb_segs = (sg >> 48) & 0x3; - mbuf->nb_segs = nb_segs; + + if (nb_segs == 1) { + mbuf->next = NULL; + return; + } + + mbuf->pkt_len = rx->pkt_lenm1 + 1; mbuf->data_len = sg & 0xFFFF; + mbuf->nb_segs = nb_segs; sg = sg >> 16; eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1)); @@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf); mbuf->ol_flags = ol_flags; - *(uint64_t *)(&mbuf->rearm_data) = val; mbuf->pkt_len = len; + mbuf->data_len = len; + *(uint64_t *)(&mbuf->rearm_data) = val; - if (flag & NIX_RX_MULTI_SEG_F) { + if (flag & NIX_RX_MULTI_SEG_F) nix_cqe_xtract_mseg(rx, mbuf, val); - } else { - mbuf->data_len = len; + else mbuf->next = NULL; - } } static inline uint16_t @@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); - /* Update that no more segments */ - mbuf0->next = NULL; - mbuf1->next = NULL; - mbuf2->next = NULL; - mbuf3->next = NULL; - /* Store the mbufs to rx_pkts */ vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + if (flags & NIX_RX_MULTI_SEG_F) { + /* Multi segment is enable build mseg list for + * individual mbufs in scalar mode. + */ + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer); + } else { + /* Update that no more segments */ + mbuf0->next = NULL; + mbuf1->next = NULL; + mbuf2->next = NULL; + mbuf3->next = NULL; + } + /* Prefetch mbufs */ roc_prefetch_store_keep(mbuf0); roc_prefetch_store_keep(mbuf1); @@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c new file mode 100644 index 0000000000..04d1e46c82 --- /dev/null +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_ethdev.h" +#include "cn10k_rx.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ + { \ + return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ + (flags) | NIX_RX_MULTI_SEG_F); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c index 0acedd0a1f..d293d4eac3 100644 --- a/drivers/net/cnxk/cn9k_rx.c +++ b/drivers/net/cnxk/cn9k_rx.c @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev, [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)]; + + rte_atomic_thread_fence(__ATOMIC_RELEASE); } void @@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev) #undef R }; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) - pick_rx_func(eth_dev, nix_eth_rx_burst); - else - pick_rx_func(eth_dev, nix_eth_rx_vec_burst); + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name, - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + NIX_RX_FASTPATH_MODES +#undef R + }; /* Copy multi seg version with no offload for tear down sequence */ if (rte_eal_process_type() == RTE_PROC_PRIMARY) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - rte_mb(); + + /* For PTP enabled, scalar rx function should be chosen as most of the + * PTP apps are implemented to rx burst 1 pkt. + */ + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_burst); + } + + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst); } diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h index 10ef5c6905..5ae9e8195c 100644 --- a/drivers/net/cnxk/cn9k_rx.h +++ b/drivers/net/cnxk/cn9k_rx.h @@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, sg = *(const uint64_t *)(rx + 1); nb_segs = (sg >> 48) & 0x3; - mbuf->nb_segs = nb_segs; + + if (nb_segs == 1) { + mbuf->next = NULL; + return; + } + + mbuf->pkt_len = rx->pkt_lenm1 + 1; mbuf->data_len = sg & 0xFFFF; + mbuf->nb_segs = nb_segs; sg = sg >> 16; eol = ((const rte_iova_t *)(rx + 1) + @@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf); mbuf->ol_flags = ol_flags; - *(uint64_t *)(&mbuf->rearm_data) = val; mbuf->pkt_len = len; + mbuf->data_len = len; + *(uint64_t *)(&mbuf->rearm_data) = val; - if (flag & NIX_RX_MULTI_SEG_F) { + if (flag & NIX_RX_MULTI_SEG_F) nix_cqe_xtract_mseg(rx, mbuf, val); - } else { - mbuf->data_len = len; + else mbuf->next = NULL; - } } static inline uint16_t @@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); - /* Update that no more segments */ - mbuf0->next = NULL; - mbuf1->next = NULL; - mbuf2->next = NULL; - mbuf3->next = NULL; - /* Store the mbufs to rx_pkts */ vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + if (flags & NIX_RX_MULTI_SEG_F) { + /* Multi segment is enable build mseg list for + * individual mbufs in scalar mode. + */ + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer); + } else { + /* Update that no more segments */ + mbuf0->next = NULL; + mbuf1->next = NULL; + mbuf2->next = NULL; + mbuf3->next = NULL; + } + /* Prefetch mbufs */ roc_prefetch_store_keep(mbuf0); roc_prefetch_store_keep(mbuf1); @@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c new file mode 100644 index 0000000000..e46d8a4749 --- /dev/null +++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_ethdev.h" +#include "cn9k_rx.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ + { \ + return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ + (flags) | \ + NIX_RX_MULTI_SEG_F); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build index 2071d0dcb2..aa8c7253fb 100644 --- a/drivers/net/cnxk/meson.build +++ b/drivers/net/cnxk/meson.build @@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c', 'cn9k_rx.c', 'cn9k_rx_mseg.c', 'cn9k_rx_vec.c', + 'cn9k_rx_vec_mseg.c', 'cn9k_tx.c', 'cn9k_tx_mseg.c', 'cn9k_tx_vec.c') @@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c', 'cn10k_rx.c', 'cn10k_rx_mseg.c', 'cn10k_rx_vec.c', + 'cn10k_rx_vec_mseg.c', 'cn10k_tx.c', 'cn10k_tx_mseg.c', 'cn10k_tx_vec.c') -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable PTP offload in vector Rx burst function, use vector path for processing mbufs and finally switch to scalar when extracting timestamp. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_ethdev.c | 1 - drivers/net/cnxk/cn10k_rx.c | 5 +- drivers/net/cnxk/cn10k_rx.h | 124 ++++++++++++++++++++++++++++---- drivers/net/cnxk/cn10k_rx_vec.c | 3 - drivers/net/cnxk/cn9k_ethdev.c | 1 - drivers/net/cnxk/cn9k_rx.c | 5 +- drivers/net/cnxk/cn9k_rx.h | 124 ++++++++++++++++++++++++++++---- drivers/net/cnxk/cn9k_rx_vec.c | 3 - drivers/net/cnxk/cnxk_ethdev.h | 19 ++--- 9 files changed, 232 insertions(+), 53 deletions(-) diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c index b079edbd35..7caec6cf14 100644 --- a/drivers/net/cnxk/cn10k_ethdev.c +++ b/drivers/net/cnxk/cn10k_ethdev.c @@ -301,7 +301,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev) if (nix_recalc_mtu(eth_dev)) plt_err("Failed to set MTU size for ptp"); - dev->scalar_ena = true; dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F; /* Setting up the function pointers as per new offload flags */ diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c index 3a9fd71309..69e767ac3d 100644 --- a/drivers/net/cnxk/cn10k_rx.c +++ b/drivers/net/cnxk/cn10k_rx.c @@ -75,10 +75,7 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->scalar_ena) { if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); return pick_rx_func(eth_dev, nix_eth_rx_burst); diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index 5926ff7f46..d9572b19e7 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -109,7 +109,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags, static __rte_always_inline void nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, - uint64_t rearm) + uint64_t rearm, const uint16_t flags) { const rte_iova_t *iova_list; struct rte_mbuf *head; @@ -125,8 +125,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, return; } - mbuf->pkt_len = rx->pkt_lenm1 + 1; - mbuf->data_len = sg & 0xFFFF; + mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); + mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); mbuf->nb_segs = nb_segs; sg = sg >> 16; @@ -207,7 +209,7 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, *(uint64_t *)(&mbuf->rearm_data) = val; if (flag & NIX_RX_MULTI_SEG_F) - nix_cqe_xtract_mseg(rx, mbuf, val); + nix_cqe_xtract_mseg(rx, mbuf, val, flag); else mbuf->next = NULL; } @@ -272,8 +274,9 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, flags); cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp, (flags & NIX_RX_OFFLOAD_TSTAMP_F), - (uint64_t *)((uint8_t *)mbuf + data_off) - ); + (flags & NIX_RX_MULTI_SEG_F), + (uint64_t *)((uint8_t *)mbuf + + data_off)); rx_pkts[packets++] = mbuf; roc_prefetch_store_keep(mbuf); head++; @@ -469,6 +472,99 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, mbuf3); } + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { + const uint16x8_t len_off = { + 0, /* ptype 0:15 */ + 0, /* ptype 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/ + 0, /* pktlen 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */ + 0, + 0, + 0}; + const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC}; + const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | + PKT_RX_IEEE1588_TMST | + rxq->tstamp->rx_tstamp_dynflag; + const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; + uint64x2_t ts01, ts23, mask; + uint64_t ts[4]; + uint8_t res; + + /* Subtract timesync length from total pkt length. */ + f0 = vsubq_u16(f0, len_off); + f1 = vsubq_u16(f1, len_off); + f2 = vsubq_u16(f2, len_off); + f3 = vsubq_u16(f3, len_off); + + /* Get the address of actual timestamp. */ + ts01 = vaddq_u64(mbuf01, data_off); + ts23 = vaddq_u64(mbuf23, data_off); + /* Load timestamp from address. */ + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 0), + ts01, 0); + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 1), + ts01, 1); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 0), + ts23, 0); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 1), + ts23, 1); + /* Convert from be to cpu byteorder. */ + ts01 = vrev64q_u8(ts01); + ts23 = vrev64q_u8(ts23); + /* Store timestamp into scalar for later use. */ + ts[0] = vgetq_lane_u64(ts01, 0); + ts[1] = vgetq_lane_u64(ts01, 1); + ts[2] = vgetq_lane_u64(ts23, 0); + ts[3] = vgetq_lane_u64(ts23, 1); + + /* Store timestamp into dynfield. */ + *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = + ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = + ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = + ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = + ts[3]; + + /* Generate ptype mask to filter L2 ether timesync */ + mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); + mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1); + mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2); + mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3); + + /* Match against L2 ether timesync. */ + mask = vceqq_u32(mask, ptype); + /* Convert from vector from scalar mask */ + res = vaddvq_u32(vandq_u32(mask, and_mask)); + res &= 0xF; + + if (res) { + /* Fill in the ol_flags for any packets that + * matched. + */ + ol_flags0 |= ((res & 0x1) ? ts_olf : 0); + ol_flags1 |= ((res & 0x2) ? ts_olf : 0); + ol_flags2 |= ((res & 0x4) ? ts_olf : 0); + ol_flags3 |= ((res & 0x8) ? ts_olf : 0); + + /* Update Rxq timestamp with the latest + * timestamp. + */ + rxq->tstamp->rx_ready = 1; + rxq->tstamp->rx_tstamp = + ts[31 - __builtin_clz(res)]; + } + } + /* Form rearm_data with ol_flags */ rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1); rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1); @@ -496,17 +592,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer); + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer); + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer); + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer); + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c index 65ffa97841..93528a44f9 100644 --- a/drivers/net/cnxk/cn10k_rx_vec.c +++ b/drivers/net/cnxk/cn10k_rx_vec.c @@ -11,9 +11,6 @@ struct rte_mbuf **rx_pkts, \ uint16_t pkts) \ { \ - /* TSTMP is not supported by vector */ \ - if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \ - return 0; \ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ (flags)); \ } diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c index 107a540915..cb302b75d8 100644 --- a/drivers/net/cnxk/cn9k_ethdev.c +++ b/drivers/net/cnxk/cn9k_ethdev.c @@ -309,7 +309,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev) if (nix_recalc_mtu(eth_dev)) plt_err("Failed to set MTU size for ptp"); - dev->scalar_ena = true; dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F; /* Setting up the function pointers as per new offload flags */ diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c index d293d4eac3..7d9f1bd61f 100644 --- a/drivers/net/cnxk/cn9k_rx.c +++ b/drivers/net/cnxk/cn9k_rx.c @@ -75,10 +75,7 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->scalar_ena) { if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); return pick_rx_func(eth_dev, nix_eth_rx_burst); diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h index 5ae9e8195c..beb52f39d5 100644 --- a/drivers/net/cnxk/cn9k_rx.h +++ b/drivers/net/cnxk/cn9k_rx.h @@ -110,7 +110,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags, static __rte_always_inline void nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, - uint64_t rearm) + uint64_t rearm, const uint16_t flags) { const rte_iova_t *iova_list; struct rte_mbuf *head; @@ -126,8 +126,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, return; } - mbuf->pkt_len = rx->pkt_lenm1 + 1; - mbuf->data_len = sg & 0xFFFF; + mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); + mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); mbuf->nb_segs = nb_segs; sg = sg >> 16; @@ -210,7 +212,7 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, *(uint64_t *)(&mbuf->rearm_data) = val; if (flag & NIX_RX_MULTI_SEG_F) - nix_cqe_xtract_mseg(rx, mbuf, val); + nix_cqe_xtract_mseg(rx, mbuf, val, flag); else mbuf->next = NULL; } @@ -275,8 +277,9 @@ cn9k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, flags); cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp, (flags & NIX_RX_OFFLOAD_TSTAMP_F), - (uint64_t *)((uint8_t *)mbuf + data_off) - ); + (flags & NIX_RX_MULTI_SEG_F), + (uint64_t *)((uint8_t *)mbuf + + data_off)); rx_pkts[packets++] = mbuf; roc_prefetch_store_keep(mbuf); head++; @@ -472,6 +475,99 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, mbuf3); } + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { + const uint16x8_t len_off = { + 0, /* ptype 0:15 */ + 0, /* ptype 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/ + 0, /* pktlen 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */ + 0, + 0, + 0}; + const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC}; + const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | + PKT_RX_IEEE1588_TMST | + rxq->tstamp->rx_tstamp_dynflag; + const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; + uint64x2_t ts01, ts23, mask; + uint64_t ts[4]; + uint8_t res; + + /* Subtract timesync length from total pkt length. */ + f0 = vsubq_u16(f0, len_off); + f1 = vsubq_u16(f1, len_off); + f2 = vsubq_u16(f2, len_off); + f3 = vsubq_u16(f3, len_off); + + /* Get the address of actual timestamp. */ + ts01 = vaddq_u64(mbuf01, data_off); + ts23 = vaddq_u64(mbuf23, data_off); + /* Load timestamp from address. */ + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 0), + ts01, 0); + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 1), + ts01, 1); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 0), + ts23, 0); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 1), + ts23, 1); + /* Convert from be to cpu byteorder. */ + ts01 = vrev64q_u8(ts01); + ts23 = vrev64q_u8(ts23); + /* Store timestamp into scalar for later use. */ + ts[0] = vgetq_lane_u64(ts01, 0); + ts[1] = vgetq_lane_u64(ts01, 1); + ts[2] = vgetq_lane_u64(ts23, 0); + ts[3] = vgetq_lane_u64(ts23, 1); + + /* Store timestamp into dynfield. */ + *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = + ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = + ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = + ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = + ts[3]; + + /* Generate ptype mask to filter L2 ether timesync */ + mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); + mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1); + mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2); + mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3); + + /* Match against L2 ether timesync. */ + mask = vceqq_u32(mask, ptype); + /* Convert from vector from scalar mask */ + res = vaddvq_u32(vandq_u32(mask, and_mask)); + res &= 0xF; + + if (res) { + /* Fill in the ol_flags for any packets that + * matched. + */ + ol_flags0 |= ((res & 0x1) ? ts_olf : 0); + ol_flags1 |= ((res & 0x2) ? ts_olf : 0); + ol_flags2 |= ((res & 0x4) ? ts_olf : 0); + ol_flags3 |= ((res & 0x8) ? ts_olf : 0); + + /* Update Rxq timestamp with the latest + * timestamp. + */ + rxq->tstamp->rx_ready = 1; + rxq->tstamp->rx_tstamp = + ts[31 - __builtin_clz(res)]; + } + } + /* Form rearm_data with ol_flags */ rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1); rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1); @@ -499,17 +595,17 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer); + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer); + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer); + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer); + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; diff --git a/drivers/net/cnxk/cn9k_rx_vec.c b/drivers/net/cnxk/cn9k_rx_vec.c index e61c2225c6..ef5f771ef7 100644 --- a/drivers/net/cnxk/cn9k_rx_vec.c +++ b/drivers/net/cnxk/cn9k_rx_vec.c @@ -9,9 +9,6 @@ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ { \ - /* TSTMP is not supported by vector */ \ - if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \ - return 0; \ return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ (flags)); \ } diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h index 67b1f42531..4eead03905 100644 --- a/drivers/net/cnxk/cnxk_ethdev.h +++ b/drivers/net/cnxk/cnxk_ethdev.h @@ -136,13 +136,12 @@ struct cnxk_eth_qconf { }; struct cnxk_timesync_info { + uint8_t rx_ready; + uint64_t rx_tstamp; uint64_t rx_tstamp_dynflag; + int tstamp_dynfield_offset; rte_iova_t tx_tstamp_iova; uint64_t *tx_tstamp; - uint64_t rx_tstamp; - int tstamp_dynfield_offset; - uint8_t tx_ready; - uint8_t rx_ready; } __plt_cache_aligned; struct cnxk_eth_dev { @@ -465,13 +464,15 @@ cnxk_nix_timestamp_dynfield(struct rte_mbuf *mbuf, static __rte_always_inline void cnxk_nix_mbuf_to_tstamp(struct rte_mbuf *mbuf, - struct cnxk_timesync_info *tstamp, bool ts_enable, + struct cnxk_timesync_info *tstamp, + const uint8_t ts_enable, const uint8_t mseg_enable, uint64_t *tstamp_ptr) { - if (ts_enable && - (mbuf->data_off == - RTE_PKTMBUF_HEADROOM + CNXK_NIX_TIMESYNC_RX_OFFSET)) { - mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET; + if (ts_enable) { + if (!mseg_enable) { + mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET; + mbuf->data_len -= CNXK_NIX_TIMESYNC_RX_OFFSET; + } /* Reading the rx timestamp inserted by CGX, viz at * starting of the packet data. -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable VLAN offload in vector Tx burst function. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 3 +- drivers/net/cnxk/cn10k_tx.h | 125 +++++++++++++++++++++++++++---- drivers/net/cnxk/cn10k_tx_vec.c | 3 +- drivers/net/cnxk/cn9k_tx.c | 3 +- drivers/net/cnxk/cn9k_tx.h | 128 ++++++++++++++++++++++++++++---- drivers/net/cnxk/cn9k_tx_vec.c | 3 +- 6 files changed, 227 insertions(+), 38 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 18694dc704..05bc163a40 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -69,8 +69,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) if (dev->scalar_ena || (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | - NIX_TX_OFFLOAD_TSO_F))) + (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 8b1446f25c..1e16978584 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -62,9 +62,14 @@ cn10k_nix_tx_ext_subs(const uint16_t flags) static __rte_always_inline uint8_t cn10k_nix_pkts_per_vec_brst(const uint16_t flags) { - RTE_SET_USED(flags); - /* We can pack up to 4 packets per LMTLINE if there are no offloads. */ - return 4 << ROC_LMT_LINES_PER_CORE_LOG2; + return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4) + << ROC_LMT_LINES_PER_CORE_LOG2; +} + +static __rte_always_inline uint8_t +cn10k_nix_tx_dwords_per_line(const uint16_t flags) +{ + return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8; } static __rte_always_inline uint64_t @@ -98,10 +103,9 @@ cn10k_nix_tx_steor_data(const uint16_t flags) static __rte_always_inline uint64_t cn10k_nix_tx_steor_vec_data(const uint16_t flags) { - const uint64_t dw_m1 = 0x7; + const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1; uint64_t data; - RTE_SET_USED(flags); /* This will be moved to addr area */ data = dw_m1; /* 15 vector sizes for single seg */ @@ -690,11 +694,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; - uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP]; + uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], + cmd2[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint16_t left, scalar, burst, i, lmt_id; + uint64x2_t sendext01_w0, sendext23_w0; + uint64x2_t sendext01_w1, sendext23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn10k_eth_txq *txq = tx_queue; @@ -720,6 +727,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0); sgdesc23_w0 = sgdesc01_w0; + /* Load command defaults into vector variables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]); + sendext23_w0 = sendext01_w0; + sendext01_w1 = vdupq_n_u64(12 | 12U << 24); + sendext23_w1 = sendext01_w1; + } + /* Get LMT base address and LMT ID as lcore id */ ROC_LMT_BASE_ID_GET(laddr, lmt_id); left = pkts; @@ -738,6 +753,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc23_w0 = senddesc01_w0; sgdesc23_w0 = sgdesc01_w0; + /* Clear vlan enables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w1 = vbicq_u64(sendext01_w1, + vdupq_n_u64(0x3FFFF00FFFF00)); + sendext23_w1 = sendext01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1303,6 +1325,52 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); + if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) { + /* Tx ol_flag for vlan. */ + const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN}; + /* Bit enable for VLAN1 */ + const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)}; + /* Tx ol_flag for QnQ. */ + const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ}; + /* Bit enable for VLAN0 */ + const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)}; + /* Load vlan values from packet. outer is VLAN 0 */ + uint64x2_t ext01 = { + ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[0]->vlan_tci) << 32, + ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[1]->vlan_tci) << 32, + }; + uint64x2_t ext23 = { + ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[2]->vlan_tci) << 32, + ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[3]->vlan_tci) << 32, + }; + + /* Get ol_flags of the packets. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* ORR vlan outer/inner values into cmd. */ + sendext01_w1 = vorrq_u64(sendext01_w1, ext01); + sendext23_w1 = vorrq_u64(sendext23_w1, ext23); + + /* Test for offload enable bits and generate masks. */ + xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv), + mlv), + vandq_u64(vtstq_u64(xtmp128, olq), + mlq)); + ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv), + mlv), + vandq_u64(vtstq_u64(ytmp128, olq), + mlq)); + + /* Set vlan enable bits into cmd based on mask. */ + sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128); + sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1381,16 +1449,41 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1); cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1); - /* Store the prepared send desc to LMT lines */ - vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]); - vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]); - lnum += 1; + if (flags & NIX_TX_NEED_EXT_HDR) { + cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1); + cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1); + cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1); + cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); + } + + if (flags & NIX_TX_NEED_EXT_HDR) { + /* Store the prepared send desc to LMT lines */ + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]); + lnum += 1; + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]); + lnum += 1; + } else { + /* Store the prepared send desc to LMT lines */ + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]); + lnum += 1; + } tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 7453f3bc98..beb5c649bb 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -14,8 +14,7 @@ uint64_t cmd[sz]; \ \ /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \ - (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ + if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ (flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index b802606075..4b43cdaff9 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -68,8 +68,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) if (dev->scalar_ena || (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | - NIX_TX_OFFLOAD_TSO_F))) + (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index 1899d6670f..d5715bb52d 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -552,10 +552,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; - uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP]; + uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], + cmd2[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; + uint64x2_t sendext01_w0, sendext23_w0; + uint64x2_t sendext01_w1, sendext23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn9k_eth_txq *txq = tx_queue; @@ -585,8 +588,19 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc23_w0 = senddesc01_w0; senddesc01_w1 = vdupq_n_u64(0); senddesc23_w1 = senddesc01_w1; - sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]); - sgdesc23_w0 = sgdesc01_w0; + + /* Load command defaults into vector variables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]); + sendext23_w0 = sendext01_w0; + sendext01_w1 = vdupq_n_u64(12 | 12U << 24); + sendext23_w1 = sendext01_w1; + sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]); + sgdesc23_w0 = sgdesc01_w0; + } else { + sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]); + sgdesc23_w0 = sgdesc01_w0; + } for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) { /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */ @@ -597,6 +611,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc23_w0 = senddesc01_w0; sgdesc23_w0 = sgdesc01_w0; + /* Clear vlan enables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w1 = vbicq_u64(sendext01_w1, + vdupq_n_u64(0x3FFFF00FFFF00)); + sendext23_w1 = sendext01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1162,6 +1183,52 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); + if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) { + /* Tx ol_flag for vlan. */ + const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN}; + /* Bit enable for VLAN1 */ + const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)}; + /* Tx ol_flag for QnQ. */ + const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ}; + /* Bit enable for VLAN0 */ + const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)}; + /* Load vlan values from packet. outer is VLAN 0 */ + uint64x2_t ext01 = { + ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[0]->vlan_tci) << 32, + ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[1]->vlan_tci) << 32, + }; + uint64x2_t ext23 = { + ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[2]->vlan_tci) << 32, + ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[3]->vlan_tci) << 32, + }; + + /* Get ol_flags of the packets. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* ORR vlan outer/inner values into cmd. */ + sendext01_w1 = vorrq_u64(sendext01_w1, ext01); + sendext23_w1 = vorrq_u64(sendext23_w1, ext23); + + /* Test for offload enable bits and generate masks. */ + xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv), + mlv), + vandq_u64(vtstq_u64(xtmp128, olq), + mlq)); + ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv), + mlv), + vandq_u64(vtstq_u64(ytmp128, olq), + mlq)); + + /* Set vlan enable bits into cmd based on mask. */ + sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128); + sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1247,17 +1314,50 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1); cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1); - do { - vst1q_u64(lmt_addr, cmd0[0]); - vst1q_u64(lmt_addr + 2, cmd1[0]); - vst1q_u64(lmt_addr + 4, cmd0[1]); - vst1q_u64(lmt_addr + 6, cmd1[1]); - vst1q_u64(lmt_addr + 8, cmd0[2]); - vst1q_u64(lmt_addr + 10, cmd1[2]); - vst1q_u64(lmt_addr + 12, cmd0[3]); - vst1q_u64(lmt_addr + 14, cmd1[3]); - lmt_status = roc_lmt_submit_ldeor(io_addr); - } while (lmt_status == 0); + if (flags & NIX_TX_NEED_EXT_HDR) { + cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1); + cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1); + cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1); + cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); + } + + if (flags & NIX_TX_NEED_EXT_HDR) { + /* With ext header in the command we can no longer send + * all 4 packets together since LMTLINE is 128bytes. + * Split and Tx twice. + */ + do { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd2[0]); + vst1q_u64(lmt_addr + 4, cmd1[0]); + vst1q_u64(lmt_addr + 6, cmd0[1]); + vst1q_u64(lmt_addr + 8, cmd2[1]); + vst1q_u64(lmt_addr + 10, cmd1[1]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + + do { + vst1q_u64(lmt_addr, cmd0[2]); + vst1q_u64(lmt_addr + 2, cmd2[2]); + vst1q_u64(lmt_addr + 4, cmd1[2]); + vst1q_u64(lmt_addr + 6, cmd0[3]); + vst1q_u64(lmt_addr + 8, cmd2[3]); + vst1q_u64(lmt_addr + 10, cmd1[3]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + } else { + do { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd1[0]); + vst1q_u64(lmt_addr + 4, cmd0[1]); + vst1q_u64(lmt_addr + 6, cmd1[1]); + vst1q_u64(lmt_addr + 8, cmd0[2]); + vst1q_u64(lmt_addr + 10, cmd1[2]); + vst1q_u64(lmt_addr + 12, cmd0[3]); + vst1q_u64(lmt_addr + 14, cmd1[3]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + } tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c index a6e7c9e542..5842facb58 100644 --- a/drivers/net/cnxk/cn9k_tx_vec.c +++ b/drivers/net/cnxk/cn9k_tx_vec.c @@ -14,8 +14,7 @@ uint64_t cmd[sz]; \ \ /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \ - (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ + if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ (flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable PTP offload in vector Tx burst function. Since, we can no-longer use a single LMT line for burst of 4, split the LMT into two and transmit twice. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 4 +- drivers/net/cnxk/cn10k_tx.h | 109 +++++++++++++++++++++++++++----- drivers/net/cnxk/cn10k_tx_vec.c | 5 +- drivers/net/cnxk/cn9k_tx.c | 4 +- drivers/net/cnxk/cn9k_tx.h | 105 ++++++++++++++++++++++++++---- drivers/net/cnxk/cn9k_tx_vec.c | 5 +- 6 files changed, 192 insertions(+), 40 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 05bc163a40..c4c3e65704 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -67,9 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || - (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) + if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 1e16978584..8af6799ff6 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -69,7 +69,9 @@ cn10k_nix_pkts_per_vec_brst(const uint16_t flags) static __rte_always_inline uint8_t cn10k_nix_tx_dwords_per_line(const uint16_t flags) { - return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8; + return (flags & NIX_TX_NEED_EXT_HDR) ? + ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) : + 8; } static __rte_always_inline uint64_t @@ -695,13 +697,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], - cmd2[NIX_DESCS_PER_LOOP]; + cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint16_t left, scalar, burst, i, lmt_id; uint64x2_t sendext01_w0, sendext23_w0; uint64x2_t sendext01_w1, sendext23_w1; + uint64x2_t sendmem01_w0, sendmem23_w0; + uint64x2_t sendmem01_w1, sendmem23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn10k_eth_txq *txq = tx_queue; @@ -733,6 +737,12 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w0 = sendext01_w0; sendext01_w1 = vdupq_n_u64(12 | 12U << 24); sendext23_w1 = sendext01_w1; + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]); + sendmem23_w0 = sendmem01_w0; + sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]); + sendmem23_w1 = sendmem01_w1; + } } /* Get LMT base address and LMT ID as lcore id */ @@ -760,6 +770,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = sendext01_w1; } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Reset send mem alg to SETTSTMP from SUB*/ + sendmem01_w0 = vbicq_u64(sendmem01_w0, + vdupq_n_u64(BIT_ULL(59))); + /* Reset send mem address to default. */ + sendmem01_w1 = + vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF)); + sendmem23_w0 = sendmem01_w0; + sendmem23_w1 = sendmem01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1371,6 +1392,44 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Tx ol_flag for timestam. */ + const uint64x2_t olf = {PKT_TX_IEEE1588_TMST, + PKT_TX_IEEE1588_TMST}; + /* Set send mem alg to SUB. */ + const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)}; + /* Increment send mem address by 8. */ + const uint64x2_t addr = {0x8, 0x8}; + + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Check if timestamp is requested and generate inverted + * mask as we need not make any changes to default cmd + * value. + */ + xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128)); + ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128)); + + /* Change send mem address to an 8 byte offset when + * TSTMP is disabled. + */ + sendmem01_w1 = vaddq_u64(sendmem01_w1, + vandq_u64(xtmp128, addr)); + sendmem23_w1 = vaddq_u64(sendmem23_w1, + vandq_u64(ytmp128, addr)); + /* Change send mem alg to SUB when TSTMP is disabled. */ + sendmem01_w0 = vorrq_u64(sendmem01_w0, + vandq_u64(xtmp128, alg)); + sendmem23_w0 = vorrq_u64(sendmem23_w0, + vandq_u64(ytmp128, alg)); + + cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1); + cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1); + cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1); + cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1458,19 +1517,39 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (flags & NIX_TX_NEED_EXT_HDR) { /* Store the prepared send desc to LMT lines */ - vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]); - lnum += 1; - vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]); - vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]); - vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]); + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]); + lnum += 1; + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]); + } else { + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]); + lnum += 1; + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]); + } lnum += 1; } else { /* Store the prepared send desc to LMT lines */ diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index beb5c649bb..0b4a4c7bae 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -13,9 +13,8 @@ { \ uint64_t cmd[sz]; \ \ - /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ - (flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* TSO is not supported by vec */ \ + if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ (flags)); \ diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index 4b43cdaff9..c32681ed44 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -66,9 +66,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || - (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) + if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index d5715bb52d..cb574a1c1d 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -553,12 +553,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], - cmd2[NIX_DESCS_PER_LOOP]; + cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint64x2_t sendext01_w0, sendext23_w0; uint64x2_t sendext01_w1, sendext23_w1; + uint64x2_t sendmem01_w0, sendmem23_w0; + uint64x2_t sendmem01_w1, sendmem23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn9k_eth_txq *txq = tx_queue; @@ -597,6 +599,12 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = sendext01_w1; sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]); sgdesc23_w0 = sgdesc01_w0; + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + sendmem01_w0 = vld1q_dup_u64(&txq->cmd[6]); + sendmem23_w0 = sendmem01_w0; + sendmem01_w1 = vld1q_dup_u64(&txq->cmd[7]); + sendmem23_w1 = sendmem01_w1; + } } else { sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]); sgdesc23_w0 = sgdesc01_w0; @@ -618,6 +626,17 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = sendext01_w1; } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Reset send mem alg to SETTSTMP from SUB*/ + sendmem01_w0 = vbicq_u64(sendmem01_w0, + vdupq_n_u64(BIT_ULL(59))); + /* Reset send mem address to default. */ + sendmem01_w1 = + vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF)); + sendmem23_w0 = sendmem01_w0; + sendmem23_w1 = sendmem01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1229,6 +1248,44 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Tx ol_flag for timestam. */ + const uint64x2_t olf = {PKT_TX_IEEE1588_TMST, + PKT_TX_IEEE1588_TMST}; + /* Set send mem alg to SUB. */ + const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)}; + /* Increment send mem address by 8. */ + const uint64x2_t addr = {0x8, 0x8}; + + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Check if timestamp is requested and generate inverted + * mask as we need not make any changes to default cmd + * value. + */ + xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128)); + ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128)); + + /* Change send mem address to an 8 byte offset when + * TSTMP is disabled. + */ + sendmem01_w1 = vaddq_u64(sendmem01_w1, + vandq_u64(xtmp128, addr)); + sendmem23_w1 = vaddq_u64(sendmem23_w1, + vandq_u64(ytmp128, addr)); + /* Change send mem alg to SUB when TSTMP is disabled. */ + sendmem01_w0 = vorrq_u64(sendmem01_w0, + vandq_u64(xtmp128, alg)); + sendmem23_w0 = vorrq_u64(sendmem23_w0, + vandq_u64(ytmp128, alg)); + + cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1); + cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1); + cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1); + cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1327,22 +1384,44 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, * Split and Tx twice. */ do { - vst1q_u64(lmt_addr, cmd0[0]); - vst1q_u64(lmt_addr + 2, cmd2[0]); - vst1q_u64(lmt_addr + 4, cmd1[0]); - vst1q_u64(lmt_addr + 6, cmd0[1]); - vst1q_u64(lmt_addr + 8, cmd2[1]); - vst1q_u64(lmt_addr + 10, cmd1[1]); + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd2[0]); + vst1q_u64(lmt_addr + 4, cmd1[0]); + vst1q_u64(lmt_addr + 6, cmd3[0]); + vst1q_u64(lmt_addr + 8, cmd0[1]); + vst1q_u64(lmt_addr + 10, cmd2[1]); + vst1q_u64(lmt_addr + 12, cmd1[1]); + vst1q_u64(lmt_addr + 14, cmd3[1]); + } else { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd2[0]); + vst1q_u64(lmt_addr + 4, cmd1[0]); + vst1q_u64(lmt_addr + 6, cmd0[1]); + vst1q_u64(lmt_addr + 8, cmd2[1]); + vst1q_u64(lmt_addr + 10, cmd1[1]); + } lmt_status = roc_lmt_submit_ldeor(io_addr); } while (lmt_status == 0); do { - vst1q_u64(lmt_addr, cmd0[2]); - vst1q_u64(lmt_addr + 2, cmd2[2]); - vst1q_u64(lmt_addr + 4, cmd1[2]); - vst1q_u64(lmt_addr + 6, cmd0[3]); - vst1q_u64(lmt_addr + 8, cmd2[3]); - vst1q_u64(lmt_addr + 10, cmd1[3]); + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + vst1q_u64(lmt_addr, cmd0[2]); + vst1q_u64(lmt_addr + 2, cmd2[2]); + vst1q_u64(lmt_addr + 4, cmd1[2]); + vst1q_u64(lmt_addr + 6, cmd3[2]); + vst1q_u64(lmt_addr + 8, cmd0[3]); + vst1q_u64(lmt_addr + 10, cmd2[3]); + vst1q_u64(lmt_addr + 12, cmd1[3]); + vst1q_u64(lmt_addr + 14, cmd3[3]); + } else { + vst1q_u64(lmt_addr, cmd0[2]); + vst1q_u64(lmt_addr + 2, cmd2[2]); + vst1q_u64(lmt_addr + 4, cmd1[2]); + vst1q_u64(lmt_addr + 6, cmd0[3]); + vst1q_u64(lmt_addr + 8, cmd2[3]); + vst1q_u64(lmt_addr + 10, cmd1[3]); + } lmt_status = roc_lmt_submit_ldeor(io_addr); } while (lmt_status == 0); } else { diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c index 5842facb58..9ade66db2b 100644 --- a/drivers/net/cnxk/cn9k_tx_vec.c +++ b/drivers/net/cnxk/cn9k_tx_vec.c @@ -13,9 +13,8 @@ { \ uint64_t cmd[sz]; \ \ - /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ - (flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* TSO is not supported by vec */ \ + if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ (flags)); \ -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable TSO offload in vector Tx burst function. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 2 +- drivers/net/cnxk/cn10k_tx.h | 97 +++++++++++++++++++++++++++++++++ drivers/net/cnxk/cn10k_tx_vec.c | 5 +- drivers/net/cnxk/cn9k_tx.c | 2 +- drivers/net/cnxk/cn9k_tx.h | 94 ++++++++++++++++++++++++++++++++ drivers/net/cnxk/cn9k_tx_vec.c | 5 +- 6 files changed, 199 insertions(+), 6 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index c4c3e65704..d06879163f 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -67,7 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) + if (dev->scalar_ena) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 8af6799ff6..26797581e7 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -689,6 +689,46 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, #if defined(RTE_ARCH_ARM64) +static __rte_always_inline void +cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, + union nix_send_ext_w0_u *w0, uint64_t ol_flags, + const uint64_t flags, const uint64_t lso_tun_fmt) +{ + uint16_t lso_sb; + uint64_t mask; + + if (!(ol_flags & PKT_TX_TCP_SEG)) + return; + + mask = -(!w1->il3type); + lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len; + + w0->u |= BIT(14); + w0->lso_sb = lso_sb; + w0->lso_mps = m->tso_segsz; + w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6); + w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM; + + /* Handle tunnel tso */ + if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) && + (ol_flags & PKT_TX_TUNNEL_MASK)) { + const uint8_t is_udp_tun = + (CNXK_NIX_UDP_TUN_BITMASK >> + ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & + 0x1; + uint8_t shift = is_udp_tun ? 32 : 0; + + shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4); + shift += (!!(ol_flags & PKT_TX_IPV6) << 3); + + w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM; + w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0; + /* Update format for UDP tunneled packet */ + + w0->lso_format = (lso_tun_fmt >> shift); + } +} + #define NIX_DESCS_PER_LOOP 4 static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, @@ -723,6 +763,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, /* Reduce the cached count */ txq->fc_cache_pkts -= pkts; + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) { + for (i = 0; i < pkts; i++) + cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags); + } senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0); senddesc23_w0 = senddesc01_w0; @@ -781,6 +826,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendmem23_w1 = sendmem01_w1; } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + /* Clear the LSO enable bit. */ + sendext01_w0 = vbicq_u64(sendext01_w0, + vdupq_n_u64(BIT_ULL(14))); + sendext23_w0 = sendext01_w0; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1430,6 +1482,51 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + const uint64_t lso_fmt = txq->lso_tun_fmt; + uint64_t sx_w0[NIX_DESCS_PER_LOOP]; + uint64_t sd_w1[NIX_DESCS_PER_LOOP]; + + /* Extract SD W1 as we need to set L4 types. */ + vst1q_u64(sd_w1, senddesc01_w1); + vst1q_u64(sd_w1 + 2, senddesc23_w1); + + /* Extract SX W0 as we need to set LSO fields. */ + vst1q_u64(sx_w0, sendext01_w0); + vst1q_u64(sx_w0 + 2, sendext23_w0); + + /* Extract ol_flags. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Prepare individual mbufs. */ + cn10k_nix_prepare_tso(tx_pkts[0], + (union nix_send_hdr_w1_u *)&sd_w1[0], + (union nix_send_ext_w0_u *)&sx_w0[0], + vgetq_lane_u64(xtmp128, 0), flags, lso_fmt); + + cn10k_nix_prepare_tso(tx_pkts[1], + (union nix_send_hdr_w1_u *)&sd_w1[1], + (union nix_send_ext_w0_u *)&sx_w0[1], + vgetq_lane_u64(xtmp128, 1), flags, lso_fmt); + + cn10k_nix_prepare_tso(tx_pkts[2], + (union nix_send_hdr_w1_u *)&sd_w1[2], + (union nix_send_ext_w0_u *)&sx_w0[2], + vgetq_lane_u64(ytmp128, 0), flags, lso_fmt); + + cn10k_nix_prepare_tso(tx_pkts[3], + (union nix_send_hdr_w1_u *)&sd_w1[3], + (union nix_send_ext_w0_u *)&sx_w0[3], + vgetq_lane_u64(ytmp128, 1), flags, lso_fmt); + + senddesc01_w1 = vld1q_u64(sd_w1); + senddesc23_w1 = vld1q_u64(sd_w1 + 2); + + sendext01_w0 = vld1q_u64(sx_w0); + sendext23_w0 = vld1q_u64(sx_w0 + 2); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 0b4a4c7bae..34e3737501 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -13,8 +13,9 @@ { \ uint64_t cmd[sz]; \ \ - /* TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ (flags)); \ diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index c32681ed44..735e21cc60 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -66,7 +66,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) + if (dev->scalar_ena) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index cb574a1c1d..dca732a9fa 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -545,6 +545,43 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, #if defined(RTE_ARCH_ARM64) +static __rte_always_inline void +cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, + union nix_send_ext_w0_u *w0, uint64_t ol_flags, + uint64_t flags) +{ + uint16_t lso_sb; + uint64_t mask; + + if (!(ol_flags & PKT_TX_TCP_SEG)) + return; + + mask = -(!w1->il3type); + lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len; + + w0->u |= BIT(14); + w0->lso_sb = lso_sb; + w0->lso_mps = m->tso_segsz; + w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6); + w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM; + + /* Handle tunnel tso */ + if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) && + (ol_flags & PKT_TX_TUNNEL_MASK)) { + const uint8_t is_udp_tun = + (CNXK_NIX_UDP_TUN_BITMASK >> + ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & + 0x1; + + w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM; + w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0; + /* Update format for UDP tunneled packet */ + w0->lso_format += is_udp_tun ? 2 : 6; + + w0->lso_format += !!(ol_flags & PKT_TX_OUTER_IPV6) << 1; + } +} + #define NIX_DESCS_PER_LOOP 4 static __rte_always_inline uint16_t cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, @@ -580,6 +617,12 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, /* Reduce the cached count */ txq->fc_cache_pkts -= pkts; + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) { + for (i = 0; i < pkts; i++) + cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags); + } + /* Lets commit any changes in the packet here as no further changes * to the packet will be done unless no fast free is enabled. */ @@ -637,6 +680,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendmem23_w1 = sendmem01_w1; } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + /* Clear the LSO enable bit. */ + sendext01_w0 = vbicq_u64(sendext01_w0, + vdupq_n_u64(BIT_ULL(14))); + sendext23_w0 = sendext01_w0; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1286,6 +1336,50 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + uint64_t sx_w0[NIX_DESCS_PER_LOOP]; + uint64_t sd_w1[NIX_DESCS_PER_LOOP]; + + /* Extract SD W1 as we need to set L4 types. */ + vst1q_u64(sd_w1, senddesc01_w1); + vst1q_u64(sd_w1 + 2, senddesc23_w1); + + /* Extract SX W0 as we need to set LSO fields. */ + vst1q_u64(sx_w0, sendext01_w0); + vst1q_u64(sx_w0 + 2, sendext23_w0); + + /* Extract ol_flags. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Prepare individual mbufs. */ + cn9k_nix_prepare_tso(tx_pkts[0], + (union nix_send_hdr_w1_u *)&sd_w1[0], + (union nix_send_ext_w0_u *)&sx_w0[0], + vgetq_lane_u64(xtmp128, 0), flags); + + cn9k_nix_prepare_tso(tx_pkts[1], + (union nix_send_hdr_w1_u *)&sd_w1[1], + (union nix_send_ext_w0_u *)&sx_w0[1], + vgetq_lane_u64(xtmp128, 1), flags); + + cn9k_nix_prepare_tso(tx_pkts[2], + (union nix_send_hdr_w1_u *)&sd_w1[2], + (union nix_send_ext_w0_u *)&sx_w0[2], + vgetq_lane_u64(ytmp128, 0), flags); + + cn9k_nix_prepare_tso(tx_pkts[3], + (union nix_send_hdr_w1_u *)&sd_w1[3], + (union nix_send_ext_w0_u *)&sx_w0[3], + vgetq_lane_u64(ytmp128, 1), flags); + + senddesc01_w1 = vld1q_u64(sd_w1); + senddesc23_w1 = vld1q_u64(sd_w1 + 2); + + sendext01_w0 = vld1q_u64(sx_w0); + sendext23_w0 = vld1q_u64(sx_w0 + 2); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c index 9ade66db2b..56a3e2514a 100644 --- a/drivers/net/cnxk/cn9k_tx_vec.c +++ b/drivers/net/cnxk/cn9k_tx_vec.c @@ -13,8 +13,9 @@ { \ uint64_t cmd[sz]; \ \ - /* TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ (flags)); \ -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add multi segment Tx vector routine. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 20 +- drivers/net/cnxk/cn10k_tx.h | 388 +++++++++++++++++++++++++-- drivers/net/cnxk/cn10k_tx_vec_mseg.c | 24 ++ drivers/net/cnxk/cn9k_tx.c | 20 +- drivers/net/cnxk/cn9k_tx.h | 272 ++++++++++++++++++- drivers/net/cnxk/cn9k_tx_vec_mseg.c | 24 ++ drivers/net/cnxk/meson.build | 6 +- 7 files changed, 709 insertions(+), 45 deletions(-) create mode 100644 drivers/net/cnxk/cn10k_tx_vec_mseg.c create mode 100644 drivers/net/cnxk/cn9k_tx_vec_mseg.c diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index d06879163f..1f30bab59a 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -67,13 +67,23 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena) + const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_vec_mseg_##name, + + NIX_TX_FASTPATH_MODES +#undef T + }; + + if (dev->scalar_ena) { pick_tx_func(eth_dev, nix_eth_tx_burst); - else + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + } else { pick_tx_func(eth_dev, nix_eth_tx_vec_burst); - - if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) - pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg); + } rte_mb(); } diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 26797581e7..532b53b319 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -42,6 +42,13 @@ } \ } while (0) +/* Encoded number of segments to number of dwords macro, each value of nb_segs + * is encoded as 4bits. + */ +#define NIX_SEGDW_MAGIC 0x76654432210ULL + +#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF) + #define LMT_OFF(lmt_addr, lmt_num, offset) \ (void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset)) @@ -102,6 +109,14 @@ cn10k_nix_tx_steor_data(const uint16_t flags) return data; } +static __rte_always_inline uint8_t +cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags) +{ + return ((flags & NIX_TX_NEED_EXT_HDR) ? + (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 : + 4); +} + static __rte_always_inline uint64_t cn10k_nix_tx_steor_vec_data(const uint16_t flags) { @@ -729,7 +744,244 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, } } +static __rte_always_inline void +cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd, + union nix_send_hdr_w0_u *sh, + union nix_send_sg_s *sg, const uint32_t flags) +{ + struct rte_mbuf *m_next; + uint64_t *slist, sg_u; + uint16_t nb_segs; + int i = 1; + + sh->total = m->pkt_len; + /* Clear sg->u header before use */ + sg->u &= 0xFC00000000000000; + sg_u = sg->u; + slist = &cmd[0]; + + sg_u = sg_u | ((uint64_t)m->data_len); + + nb_segs = m->nb_segs - 1; + m_next = m->next; + + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << 55); + /* Mark mempool object as "put" since it is freed by NIX */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + + m = m_next; + /* Fill mbuf segments */ + do { + m_next = m->next; + sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); + *slist = rte_mbuf_data_iova(m); + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55)); + /* Mark mempool object as "put" since it is freed by NIX + */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << (i + 55)))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + slist++; + i++; + nb_segs--; + if (i > 2 && nb_segs) { + i = 0; + /* Next SG subdesc */ + *(uint64_t *)slist = sg_u & 0xFC00000000000000; + sg->u = sg_u; + sg->segs = 3; + sg = (union nix_send_sg_s *)slist; + sg_u = sg->u; + slist++; + } + m = m_next; + } while (nb_segs); + + sg->u = sg_u; + sg->segs = i; +} + +static __rte_always_inline void +cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0, + uint64x2_t *cmd1, const uint8_t segdw, + const uint32_t flags) +{ + union nix_send_hdr_w0_u sh; + union nix_send_sg_s sg; + + if (m->nb_segs == 1) { + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + sg.u = vgetq_lane_u64(cmd1[0], 0); + sg.u |= (cnxk_nix_prefree_seg(m) << 55); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); + } + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + sg.u = vgetq_lane_u64(cmd1[0], 0); + if (!(sg.u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + return; + } + + sh.u = vgetq_lane_u64(cmd0[0], 0); + sg.u = vgetq_lane_u64(cmd1[0], 0); + + cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags); + + sh.sizem1 = segdw - 1; + cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); +} + #define NIX_DESCS_PER_LOOP 4 + +static __rte_always_inline uint8_t +cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0, + uint64x2_t *cmd1, uint64x2_t *cmd2, + uint64x2_t *cmd3, uint8_t *segdw, + uint64_t *lmt_addr, __uint128_t *data128, + uint8_t *shift, const uint16_t flags) +{ + uint8_t j, off, lmt_used; + + if (!(flags & NIX_TX_NEED_EXT_HDR) && + !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + /* No segments in 4 consecutive packets. */ + if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) { + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) + cn10k_nix_prepare_mseg_vec(mbufs[j], NULL, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd1[0]); + vst1q_u64(lmt_addr + 4, cmd0[1]); + vst1q_u64(lmt_addr + 6, cmd1[1]); + vst1q_u64(lmt_addr + 8, cmd0[2]); + vst1q_u64(lmt_addr + 10, cmd1[2]); + vst1q_u64(lmt_addr + 12, cmd0[3]); + vst1q_u64(lmt_addr + 14, cmd1[3]); + + *data128 |= ((__uint128_t)7) << *shift; + shift += 3; + + return 1; + } + } + + lmt_used = 0; + for (j = 0; j < NIX_DESCS_PER_LOOP;) { + /* Fit consecutive packets in same LMTLINE. */ + if ((segdw[j] + segdw[j + 1]) <= 8) { + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + cn10k_nix_prepare_mseg_vec(mbufs[j], NULL, + &cmd0[j], &cmd1[j], + segdw[j], flags); + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL, + &cmd0[j + 1], + &cmd1[j + 1], + segdw[j + 1], flags); + /* TSTAMP takes 4 each, no segs. */ + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + vst1q_u64(lmt_addr + 6, cmd3[j]); + + vst1q_u64(lmt_addr + 8, cmd0[j + 1]); + vst1q_u64(lmt_addr + 10, cmd2[j + 1]); + vst1q_u64(lmt_addr + 12, cmd1[j + 1]); + vst1q_u64(lmt_addr + 14, cmd3[j + 1]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + /* EXT header take 3 each, space for 2 segs.*/ + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 6, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + off = segdw[j] - 3; + off <<= 1; + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], + lmt_addr + 12 + off, + &cmd0[j + 1], + &cmd1[j + 1], + segdw[j + 1], flags); + vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]); + vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]); + } else { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 4, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + off = segdw[j] - 2; + off <<= 1; + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], + lmt_addr + 8 + off, + &cmd0[j + 1], + &cmd1[j + 1], + segdw[j + 1], flags); + vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]); + } + *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1) + << *shift; + *shift += 3; + j += 2; + } else { + if ((flags & NIX_TX_NEED_EXT_HDR) && + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 6, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + off = segdw[j] - 4; + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd3[j]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 6, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + } else { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 4, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + } + *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift; + *shift += 3; + j++; + } + lmt_used++; + lmt_addr += 16; + } + + return lmt_used; +} + static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t *cmd, const uint16_t flags) @@ -738,7 +990,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP]; - uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa; + uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint16_t left, scalar, burst, i, lmt_id; @@ -746,6 +998,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t sendext01_w1, sendext23_w1; uint64x2_t sendmem01_w0, sendmem23_w0; uint64x2_t sendmem01_w1, sendmem23_w1; + uint8_t segdw[NIX_DESCS_PER_LOOP + 1]; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn10k_eth_txq *txq = tx_queue; @@ -754,7 +1007,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t ltypes01, ltypes23; uint64x2_t xtmp128, ytmp128; uint64x2_t xmask01, xmask23; - uint8_t lnum; + uint8_t lnum, shift; + union wdata { + __uint128_t data128; + uint64_t data[2]; + } wd; NIX_XMIT_FC_OR_RETURN(txq, pkts); @@ -798,8 +1055,43 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, burst = left > cn10k_nix_pkts_per_vec_brst(flags) ? cn10k_nix_pkts_per_vec_brst(flags) : left; + if (flags & NIX_TX_MULTI_SEG_F) { + wd.data128 = 0; + shift = 16; + } lnum = 0; + for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) { + if (flags & NIX_TX_MULTI_SEG_F) { + struct rte_mbuf *m = tx_pkts[j]; + uint8_t j; + + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) { + /* Get dwords based on nb_segs. */ + segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs); + /* Add dwords based on offloads. */ + segdw[j] += 1 + /* SEND HDR */ + !!(flags & NIX_TX_NEED_EXT_HDR) + + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); + } + + /* Check if there are enough LMTLINES for this loop */ + if (lnum + 4 > 32) { + uint8_t ldwords_con = 0, lneeded = 0; + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) { + ldwords_con += segdw[j]; + if (ldwords_con > 8) { + lneeded += 1; + ldwords_con = segdw[j]; + } + } + lneeded += 1; + if (lnum + lneeded > 32) { + burst = i; + break; + } + } + } /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */ senddesc01_w0 = vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF)); @@ -1527,7 +1819,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w0 = vld1q_u64(sx_w0 + 2); } - if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) && + !(flags & NIX_TX_MULTI_SEG_F)) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); xmask23 = xmask01; @@ -1567,7 +1860,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, (void **)&mbuf3, 1, 0); senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); - } else { + } else if (!(flags & NIX_TX_MULTI_SEG_F)) { /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1612,7 +1905,19 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); } - if (flags & NIX_TX_NEED_EXT_HDR) { + if (flags & NIX_TX_MULTI_SEG_F) { + uint8_t j; + + segdw[4] = 8; + j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1, + cmd2, cmd3, segdw, + (uint64_t *) + LMT_OFF(laddr, lnum, + 0), + &wd.data128, &shift, + flags); + lnum += j; + } else if (flags & NIX_TX_NEED_EXT_HDR) { /* Store the prepared send desc to LMT lines */ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); @@ -1664,34 +1969,55 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[0] >>= 16; + /* Trigger LMTST */ if (lnum > 16) { - data = cn10k_nix_tx_steor_vec_data(flags); - pa = io_addr | (data & 0x7) << 4; - data &= ~0x7ULL; - data |= (15ULL << 12); - data |= (uint64_t)lmt_id; + if (!(flags & NIX_TX_MULTI_SEG_F)) + wd.data[0] = cn10k_nix_tx_steor_vec_data(flags); + + pa = io_addr | (wd.data[0] & 0x7) << 4; + wd.data[0] &= ~0x7ULL; + + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[0] <<= 16; + + wd.data[0] |= (15ULL << 12); + wd.data[0] |= (uint64_t)lmt_id; /* STEOR0 */ - roc_lmt_submit_steorl(data, pa); + roc_lmt_submit_steorl(wd.data[0], pa); - data = cn10k_nix_tx_steor_vec_data(flags); - pa = io_addr | (data & 0x7) << 4; - data &= ~0x7ULL; - data |= ((uint64_t)(lnum - 17)) << 12; - data |= (uint64_t)(lmt_id + 16); + if (!(flags & NIX_TX_MULTI_SEG_F)) + wd.data[1] = cn10k_nix_tx_steor_vec_data(flags); + + pa = io_addr | (wd.data[1] & 0x7) << 4; + wd.data[1] &= ~0x7ULL; + + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[1] <<= 16; + + wd.data[1] |= ((uint64_t)(lnum - 17)) << 12; + wd.data[1] |= (uint64_t)(lmt_id + 16); /* STEOR1 */ - roc_lmt_submit_steorl(data, pa); + roc_lmt_submit_steorl(wd.data[1], pa); } else if (lnum) { - data = cn10k_nix_tx_steor_vec_data(flags); - pa = io_addr | (data & 0x7) << 4; - data &= ~0x7ULL; - data |= ((uint64_t)(lnum - 1)) << 12; - data |= lmt_id; + if (!(flags & NIX_TX_MULTI_SEG_F)) + wd.data[0] = cn10k_nix_tx_steor_vec_data(flags); + + pa = io_addr | (wd.data[0] & 0x7) << 4; + wd.data[0] &= ~0x7ULL; + + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[0] <<= 16; + + wd.data[0] |= ((uint64_t)(lnum - 1)) << 12; + wd.data[0] |= lmt_id; /* STEOR0 */ - roc_lmt_submit_steorl(data, pa); + roc_lmt_submit_steorl(wd.data[0], pa); } left -= burst; @@ -1699,9 +2025,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (left) goto again; - if (unlikely(scalar)) - pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd, - flags); + if (unlikely(scalar)) { + if (flags & NIX_TX_MULTI_SEG_F) + pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, + scalar, cmd, flags); + else + pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, + cmd, flags); + } return pkts; } @@ -1866,7 +2197,10 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \ - void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ NIX_TX_FASTPATH_MODES #undef T diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c new file mode 100644 index 0000000000..1fad81dbad --- /dev/null +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_ethdev.h" +#include "cn10k_tx.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \ + { \ + uint64_t cmd[sz]; \ + \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ + return 0; \ + return cn10k_nix_xmit_pkts_vector( \ + tx_queue, tx_pkts, pkts, cmd, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index 735e21cc60..763f9a14fd 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -66,13 +66,23 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena) + const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_nix_xmit_pkts_vec_mseg_##name, + + NIX_TX_FASTPATH_MODES +#undef T + }; + + if (dev->scalar_ena) { pick_tx_func(eth_dev, nix_eth_tx_burst); - else + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + } else { pick_tx_func(eth_dev, nix_eth_tx_vec_burst); - - if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) - pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg); + } rte_mb(); } diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index dca732a9fa..ed65cd351f 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -582,7 +582,238 @@ cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, } } +static __rte_always_inline uint8_t +cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd, + union nix_send_hdr_w0_u *sh, + union nix_send_sg_s *sg, const uint32_t flags) +{ + struct rte_mbuf *m_next; + uint64_t *slist, sg_u; + uint16_t nb_segs; + uint64_t segdw; + int i = 1; + + sh->total = m->pkt_len; + /* Clear sg->u header before use */ + sg->u &= 0xFC00000000000000; + sg_u = sg->u; + slist = &cmd[0]; + + sg_u = sg_u | ((uint64_t)m->data_len); + + nb_segs = m->nb_segs - 1; + m_next = m->next; + + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << 55); + /* Mark mempool object as "put" since it is freed by NIX */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + + m = m_next; + /* Fill mbuf segments */ + do { + m_next = m->next; + sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); + *slist = rte_mbuf_data_iova(m); + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55)); + /* Mark mempool object as "put" since it is freed by NIX + */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << (i + 55)))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + slist++; + i++; + nb_segs--; + if (i > 2 && nb_segs) { + i = 0; + /* Next SG subdesc */ + *(uint64_t *)slist = sg_u & 0xFC00000000000000; + sg->u = sg_u; + sg->segs = 3; + sg = (union nix_send_sg_s *)slist; + sg_u = sg->u; + slist++; + } + m = m_next; + } while (nb_segs); + + sg->u = sg_u; + sg->segs = i; + segdw = (uint64_t *)slist - (uint64_t *)&cmd[0]; + + segdw += 2; + /* Roundup extra dwords to multiple of 2 */ + segdw = (segdw >> 1) + (segdw & 0x1); + /* Default dwords */ + segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) + + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); + sh->sizem1 = segdw - 1; + + return segdw; +} + +static __rte_always_inline uint8_t +cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0, + uint64x2_t *cmd1, const uint32_t flags) +{ + union nix_send_hdr_w0_u sh; + union nix_send_sg_s sg; + uint8_t ret; + + if (m->nb_segs == 1) { + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + sg.u = vgetq_lane_u64(cmd1[0], 0); + sg.u |= (cnxk_nix_prefree_seg(m) << 55); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); + } + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + sg.u = vgetq_lane_u64(cmd1[0], 0); + if (!(sg.u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) + + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); + } + + sh.u = vgetq_lane_u64(cmd0[0], 0); + sg.u = vgetq_lane_u64(cmd1[0], 0); + + ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags); + + cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); + return ret; +} + #define NIX_DESCS_PER_LOOP 4 + +static __rte_always_inline void +cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1, + uint64x2_t *cmd2, uint64x2_t *cmd3, + uint8_t *segdw, + uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2], + uint64_t *lmt_addr, rte_iova_t io_addr, + const uint32_t flags) +{ + uint64_t lmt_status; + uint8_t j, off; + + if (!(flags & NIX_TX_NEED_EXT_HDR) && + !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + /* No segments in 4 consecutive packets. */ + if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) { + do { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd1[0]); + vst1q_u64(lmt_addr + 4, cmd0[1]); + vst1q_u64(lmt_addr + 6, cmd1[1]); + vst1q_u64(lmt_addr + 8, cmd0[2]); + vst1q_u64(lmt_addr + 10, cmd1[2]); + vst1q_u64(lmt_addr + 12, cmd0[3]); + vst1q_u64(lmt_addr + 14, cmd1[3]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + + return; + } + } + + for (j = 0; j < NIX_DESCS_PER_LOOP;) { + /* Fit consecutive packets in same LMTLINE. */ + if ((segdw[j] + segdw[j + 1]) <= 8) { +again0: + if ((flags & NIX_TX_NEED_EXT_HDR) && + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 4; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd3[j]); + + vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]); + vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]); + roc_lmt_mov_seg(lmt_addr + 14 + off, + slist[j + 1], segdw[j + 1] - 4); + off += ((segdw[j + 1] - 4) << 1); + vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 3; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]); + vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]); + roc_lmt_mov_seg(lmt_addr + 12 + off, + slist[j + 1], segdw[j + 1] - 3); + } else { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 2; + roc_lmt_mov_seg(lmt_addr + 4, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]); + roc_lmt_mov_seg(lmt_addr + 8 + off, + slist[j + 1], segdw[j + 1] - 2); + } + lmt_status = roc_lmt_submit_ldeor(io_addr); + if (lmt_status == 0) + goto again0; + j += 2; + } else { +again1: + if ((flags & NIX_TX_NEED_EXT_HDR) && + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 4; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd3[j]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 3; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + } else { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 2; + roc_lmt_mov_seg(lmt_addr + 4, slist[j], off); + } + lmt_status = roc_lmt_submit_ldeor(io_addr); + if (lmt_status == 0) + goto again1; + j += 1; + } + } +} + static __rte_always_inline uint16_t cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t *cmd, const uint16_t flags) @@ -1380,7 +1611,8 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w0 = vld1q_u64(sx_w0 + 2); } - if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) && + !(flags & NIX_TX_MULTI_SEG_F)) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); xmask23 = xmask01; @@ -1424,7 +1656,7 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, * cnxk_nix_prefree_seg are written before LMTST. */ rte_io_wmb(); - } else { + } else if (!(flags & NIX_TX_MULTI_SEG_F)) { /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1472,7 +1704,27 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); } - if (flags & NIX_TX_NEED_EXT_HDR) { + if (flags & NIX_TX_MULTI_SEG_F) { + uint64_t seg_list[NIX_DESCS_PER_LOOP] + [CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; + uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1]; + + /* Build mseg list for each packet individually. */ + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) + segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j], + seg_list[j], &cmd0[j], + &cmd1[j], flags); + segdw[4] = 8; + + /* Commit all changes to mbuf before LMTST. */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + rte_io_wmb(); + + cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3, + segdw, seg_list, + lmt_addr, io_addr, + flags); + } else if (flags & NIX_TX_NEED_EXT_HDR) { /* With ext header in the command we can no longer send * all 4 packets together since LMTLINE is 128bytes. * Split and Tx twice. @@ -1534,9 +1786,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } - if (unlikely(pkts_left)) - pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd, - flags); + if (unlikely(pkts_left)) { + if (flags & NIX_TX_MULTI_SEG_F) + pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, + pkts_left, cmd, flags); + else + pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, + cmd, flags); + } return pkts; } @@ -1701,6 +1958,9 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn9k_tx_vec_mseg.c b/drivers/net/cnxk/cn9k_tx_vec_mseg.c new file mode 100644 index 0000000000..0256efd45a --- /dev/null +++ b/drivers/net/cnxk/cn9k_tx_vec_mseg.c @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_ethdev.h" +#include "cn9k_tx.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \ + { \ + uint64_t cmd[sz]; \ + \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ + return 0; \ + return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ + (flags) | \ + NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build index aa8c7253fb..361f7ce849 100644 --- a/drivers/net/cnxk/meson.build +++ b/drivers/net/cnxk/meson.build @@ -26,7 +26,8 @@ sources += files('cn9k_ethdev.c', 'cn9k_rx_vec_mseg.c', 'cn9k_tx.c', 'cn9k_tx_mseg.c', - 'cn9k_tx_vec.c') + 'cn9k_tx_vec.c', + 'cn9k_tx_vec_mseg.c') # CN10K sources += files('cn10k_ethdev.c', 'cn10k_rte_flow.c', @@ -36,7 +37,8 @@ sources += files('cn10k_ethdev.c', 'cn10k_rx_vec_mseg.c', 'cn10k_tx.c', 'cn10k_tx_mseg.c', - 'cn10k_tx_vec.c') + 'cn10k_tx_vec.c', + 'cn10k_tx_vec_mseg.c') deps += ['bus_pci', 'cryptodev', 'eventdev', 'security'] deps += ['common_cnxk', 'mempool_cnxk'] -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter. Resize cn10k workslot fastpath structure to fit in 64B cacheline size. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 28 ++++ doc/guides/rel_notes/release_21_08.rst | 5 + drivers/common/cnxk/roc_nix.h | 3 + drivers/common/cnxk/roc_nix_fc.c | 78 ++++++++++ drivers/common/cnxk/roc_nix_priv.h | 3 +- drivers/common/cnxk/version.map | 1 + drivers/event/cnxk/cn10k_eventdev.c | 107 +++++++++++--- drivers/event/cnxk/cn10k_worker.c | 7 +- drivers/event/cnxk/cn10k_worker.h | 32 +++-- drivers/event/cnxk/cn9k_eventdev.c | 89 ++++++++++++ drivers/event/cnxk/cn9k_worker.h | 4 + drivers/event/cnxk/cnxk_eventdev.c | 2 + drivers/event/cnxk/cnxk_eventdev.h | 43 ++++-- drivers/event/cnxk/cnxk_eventdev_adptr.c | 176 +++++++++++++++++++++++ drivers/event/cnxk/meson.build | 9 +- 15 files changed, 540 insertions(+), 47 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 36da3800cc..b7e82c1273 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -39,6 +39,10 @@ Features of the OCTEON cnxk SSO PMD are: time granularity of 2.5us on CN9K and 1us on CN10K. - Up to 256 TIM rings a.k.a event timer adapters. - Up to 8 rings traversed in parallel. +- HW managed packets enqueued from ethdev to eventdev exposed through event eth + RX adapter. +- N:1 ethernet device Rx queue to Event queue mapping. +- Full Rx offload support defined through ethdev queue configuration. Prerequisites and Compilation procedure --------------------------------------- @@ -93,6 +97,15 @@ Runtime Config Options -a 0002:0e:00.0,qos=[1-50-50-50] +- ``Force Rx Back pressure`` + + Force Rx back pressure when same mempool is used across ethernet device + connected to event device. + + For example:: + + -a 0002:0e:00.0,force_rx_bp=1 + - ``TIM disable NPA`` By default chunks are allocated from NPA then TIM can automatically free @@ -160,3 +173,18 @@ Debugging Options +---+------------+-------------------------------------------------------+ | 2 | TIM | --log-level='pmd\.event\.cnxk\.timer,8' | +---+------------+-------------------------------------------------------+ + +Limitations +----------- + +Rx adapter support +~~~~~~~~~~~~~~~~~~ + +Using the same mempool for all the ethernet device ports connected to +event device would cause back pressure to be asserted only on the first +ethernet device. +Back pressure is automatically disabled when using same mempool for all the +ethernet devices connected to event device to override this applications can +use `force_rx_bp=1` device arguments. +Using unique mempool per each ethernet device is recommended when they are +connected to event device. diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 31e49e1a56..3892c8017a 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -60,6 +60,11 @@ New Features * Added net/cnxk driver which provides the support for the integrated ethernet device. +* **Added support for Marvell CN10K, CN9K, event Rx adapter.** + + * Added Rx adapter support for event/cnxk when the ethernet device requested is + net/cnxk. + Removed Items ------------- diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index bb69027956..76613fe84e 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix); +void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, + uint8_t ena, uint8_t force); + /* NPC */ int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable); diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c index 47be8aa3f8..f17eba4169 100644 --- a/drivers/common/cnxk/roc_nix_fc.c +++ b/drivers/common/cnxk/roc_nix_fc.c @@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode mode) exit: return rc; } + +void +rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena, + uint8_t force) +{ + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + struct npa_lf *lf = idev_npa_obj_get(); + struct npa_aq_enq_req *req; + struct npa_aq_enq_rsp *rsp; + struct mbox *mbox; + uint32_t limit; + int rc; + + if (roc_nix_is_sdp(roc_nix)) + return; + + if (!lf) + return; + mbox = lf->mbox; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_READ; + + rc = mbox_process_msg(mbox, (void *)&rsp); + if (rc) + return; + + limit = rsp->aura.limit; + /* BP is already enabled. */ + if (rsp->aura.bp_ena) { + /* If BP ids don't match disable BP. */ + if ((rsp->aura.nix0_bpid != nix->bpid[0]) && !force) { + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + req->aura.bp_ena = 0; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); + } + return; + } + + /* BP was previously enabled but now disabled skip. */ + if (rsp->aura.bp) + return; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + if (ena) { + req->aura.nix0_bpid = nix->bpid[0]; + req->aura_mask.nix0_bpid = ~(req->aura_mask.nix0_bpid); + req->aura.bp = NIX_RQ_AURA_THRESH( + limit > 128 ? 256 : limit); /* 95% of size*/ + req->aura_mask.bp = ~(req->aura_mask.bp); + } + + req->aura.bp_ena = !!ena; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); +} diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h index d9c32df442..9dc0c88a6f 100644 --- a/drivers/common/cnxk/roc_nix_priv.h +++ b/drivers/common/cnxk/roc_nix_priv.h @@ -16,7 +16,8 @@ #define NIX_SQB_LOWER_THRESH ((uint16_t)70) /* Apply BP/DROP when CQ is 95% full */ -#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_RQ_AURA_THRESH(x) (((x) * 95) / 100) /* IRQ triggered when NIX_LF_CINTX_CNT[QCOUNT] crosses this value */ #define CQ_CQE_THRESH_DEFAULT 0x1ULL diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map index 8a5c839e57..cb1ce4b6fc 100644 --- a/drivers/common/cnxk/version.map +++ b/drivers/common/cnxk/version.map @@ -29,6 +29,7 @@ INTERNAL { roc_nix_fc_config_set; roc_nix_fc_mode_set; roc_nix_fc_mode_get; + rox_nix_fc_npa_bp_cfg; roc_nix_get_base_chan; roc_nix_get_pf; roc_nix_get_pf_func; diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index bf4052c76c..2060c8fe84 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -6,18 +6,6 @@ #include "cnxk_eventdev.h" #include "cnxk_worker.h" -static void -cn10k_init_hws_ops(struct cn10k_sso_hws *ws, uintptr_t base) -{ - ws->tag_wqe_op = base + SSOW_LF_GWS_WQE0; - ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0; - ws->updt_wqe_op = base + SSOW_LF_GWS_OP_UPD_WQP_GRP1; - ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM; - ws->swtag_untag_op = base + SSOW_LF_GWS_OP_SWTAG_UNTAG; - ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH; - ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED; -} - static uint32_t cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev) { @@ -56,7 +44,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); - cn10k_init_hws_ops(ws, ws->base); ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -135,13 +122,14 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cq_ds_cnt &= 0x3FFF3FFF0000; while (aq_cnt || cq_ds_cnt || ds_cnt) { - plt_write64(req, ws->getwrk_op); + plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0); cn10k_sso_hws_get_work_empty(ws, &ev); if (fn != NULL && ev.u64 != 0) fn(arg, ev); if (ev.sched_type != SSO_TT_EMPTY) - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, - ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush( + ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); do { val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE); } while (val & BIT_ULL(56)); @@ -205,9 +193,11 @@ cn10k_sso_hws_reset(void *arg, void *hws) if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) { - plt_write64(BIT_ULL(16) | 1, ws->getwrk_op); + plt_write64(BIT_ULL(16) | 1, + ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0)); if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */ @@ -407,6 +397,80 @@ cn10k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn10k)); } +static int +cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } +} + +static int +cn10k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn10k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn10k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -420,6 +484,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .port_unlink = cn10k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -502,6 +572,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn10k, cn10k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN10K_SSO_GW_MODE "=<int>" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index e2aa534c64..5dbae275ba 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -18,7 +18,8 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev) cn10k_sso_hws_forward_event(ws, ev); break; case RTE_EVENT_OP_RELEASE: - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); break; default: return 0; @@ -69,7 +70,7 @@ cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return 1; } @@ -94,7 +95,7 @@ cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return ret; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 2f093a8dd5..c7250bf9e7 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn10k_ethdev.h" +#include "cn10k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t @@ -31,7 +35,8 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) { const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(ws->tag_wqe_op)); + const uint8_t cur_tt = + CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)); /* CNXK model * cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED @@ -43,9 +48,11 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) if (new_tt == SSO_TT_UNTAGGED) { if (cur_tt != SSO_TT_UNTAGGED) - cnxk_sso_hws_swtag_untag(ws->swtag_untag_op); + cnxk_sso_hws_swtag_untag(ws->base + + SSOW_LF_GWS_OP_SWTAG_UNTAG); } else { - cnxk_sso_hws_swtag_norm(tag, new_tt, ws->swtag_norm_op); + cnxk_sso_hws_swtag_norm(tag, new_tt, + ws->base + SSOW_LF_GWS_OP_SWTAG_NORM); } ws->swtag_req = 1; } @@ -57,8 +64,9 @@ cn10k_sso_hws_fwd_group(struct cn10k_sso_hws *ws, const struct rte_event *ev, const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - plt_write64(ev->u64, ws->updt_wqe_op); - cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op); + plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1); + cnxk_sso_hws_swtag_desched(tag, new_tt, grp, + ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED); } static __rte_always_inline void @@ -68,7 +76,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, const uint8_t grp = ev->queue_id; /* Group hasn't changed, Use SWTAG to forward the event */ - if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_wqe_op)) == grp) + if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp) cn10k_sso_hws_fwd_swtag(ws, ev); else /* @@ -93,12 +101,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" : [wdata] "+r"(gw.get_work) - : [gw_loc] "r"(ws->getwrk_op) + : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else - plt_write64(gw.u64[0], ws->getwrk_op); + plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | @@ -130,11 +139,12 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) - : [tag_loc] "r"(ws->tag_wqe_op) + : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 0684417eab..072800c243 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -481,6 +481,88 @@ cn9k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn9k)); } +static int +cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + dws->lookup_mem = lookup_mem; + dws->tstamp = tstmp_info; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } + } +} + +static int +cn9k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn9k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn9k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -494,6 +576,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .port_unlink = cn9k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -571,6 +659,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn9k, cn9k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn9k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn9k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN9K_SSO_SINGLE_WS "=1" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 38fca08fb6..f5a4401465 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN9K_WORKER_H__ #define __CN9K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn9k_ethdev.h" +#include "cn9k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c index 7189ee3a79..cfd7fb971c 100644 --- a/drivers/event/cnxk/cnxk_eventdev.c +++ b/drivers/event/cnxk/cnxk_eventdev.c @@ -571,6 +571,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs) &dev->xae_cnt); rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict, dev); + rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_value, + &dev->force_ena_bp); rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_value, &single_ws); rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 668e51d62a..b65d725f55 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -6,6 +6,8 @@ #define __CNXK_EVENTDEV_H__ #include <rte_devargs.h> +#include <rte_ethdev.h> +#include <rte_event_eth_rx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -18,6 +20,7 @@ #define CNXK_SSO_XAE_CNT "xae_cnt" #define CNXK_SSO_GGRP_QOS "qos" +#define CNXK_SSO_FORCE_BP "force_rx_bp" #define CN9K_SSO_SINGLE_WS "single_ws" #define CN10K_SSO_GW_MODE "gw_mode" @@ -81,7 +84,10 @@ struct cnxk_sso_evdev { uint64_t nb_xaq_cfg; rte_iova_t fc_iova; struct rte_mempool *xaq_pool; + uint64_t rx_offloads; uint64_t adptr_xae_cnt; + uint16_t rx_adptr_pool_cnt; + uint64_t *rx_adptr_pools; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -89,25 +95,18 @@ struct cnxk_sso_evdev { uint32_t xae_cnt; uint8_t qos_queue_cnt; struct cnxk_sso_qos *qos_parse_data; + uint8_t force_ena_bp; /* CN9K */ uint8_t dual_ws; /* CN10K */ uint8_t gw_mode; } __rte_cache_aligned; -/* CN10K HWS ops */ -#define CN10K_SSO_HWS_OPS \ - uintptr_t swtag_desched_op; \ - uintptr_t swtag_flush_op; \ - uintptr_t swtag_untag_op; \ - uintptr_t swtag_norm_op; \ - uintptr_t updt_wqe_op; \ - uintptr_t tag_wqe_op; \ - uintptr_t getwrk_op - struct cn10k_sso_hws { - /* Get Work Fastpath data */ - CN10K_SSO_HWS_OPS; + uint64_t base; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint32_t gw_wdata; uint8_t swtag_req; uint8_t hws_id; @@ -115,7 +114,6 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; uintptr_t lmt_base; } __rte_cache_aligned; @@ -132,6 +130,9 @@ struct cn10k_sso_hws { struct cn9k_sso_hws { /* Get Work Fastpath data */ CN9K_SSO_HWS_OPS; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t hws_id; /* Add Work Fastpath data */ @@ -148,6 +149,9 @@ struct cn9k_sso_hws_state { struct cn9k_sso_hws_dual { /* Get Work Fastpath data */ struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */ + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t vws; /* Ping pong bit */ uint8_t hws_id; @@ -250,4 +254,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev, /* CN9K */ void cn9k_sso_set_rsrc(void *arg); +/* Common adapter ops */ +int cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf); +int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id); +int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); +int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); + #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 89a1d82c14..24bfd985e7 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -2,6 +2,7 @@ * Copyright(C) 2021 Marvell. */ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" void @@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, int i; switch (event_type) { + case RTE_EVENT_TYPE_ETHDEV: { + struct cnxk_eth_rxq_sp *rxq = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->rx_adptr_pool_cnt; i++) { + if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i]) + return; + } + + dev->rx_adptr_pool_cnt++; + old_ptr = dev->rx_adptr_pools; + dev->rx_adptr_pools = rte_realloc( + dev->rx_adptr_pools, + sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0); + if (dev->rx_adptr_pools == NULL) { + dev->adptr_xae_cnt += rxq->qconf.mp->size; + dev->rx_adptr_pools = old_ptr; + dev->rx_adptr_pool_cnt--; + return; + } + dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] = + (uint64_t)rxq->qconf.mp; + + dev->adptr_xae_cnt += rxq->qconf.mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; @@ -65,3 +92,152 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, break; } } + +static int +cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, + uint16_t port_id, const struct rte_event *ev, + uint8_t custom_flowid) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 1; + rq->tt = ev->sched_type; + rq->hwgrp = ev->queue_id; + rq->flow_tag_width = 20; + rq->wqe_skip = 1; + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4)) + << 24; + + if (custom_flowid) { + rq->flow_tag_width = 0; + rq->tag_mask |= ev->flow_id; + } + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 0; + rq->flow_tag_width = 32; + rq->tag_mask = 0; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +int +cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t port = eth_dev->data->port_id; + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + rxq_sp = eth_dev->data->rx_queues[i]; + rxq_sp = rxq_sp - 1; + cnxk_sso_updt_xae_cnt(dev, rxq_sp, + RTE_EVENT_TYPE_ETHDEV); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc |= cnxk_sso_rxq_enable( + cnxk_eth_dev, i, port, &queue_conf->ev, + !!(queue_conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID)); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, true, + dev->force_ena_bp); + } + } else { + rxq_sp = eth_dev->data->rx_queues[rx_queue_id]; + rxq_sp = rxq_sp - 1; + cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc |= cnxk_sso_rxq_enable( + cnxk_eth_dev, (uint16_t)rx_queue_id, port, + &queue_conf->ev, + !!(queue_conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID)); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, true, + dev->force_ena_bp); + } + + if (rc < 0) { + plt_err("Failed to configure Rx adapter port=%d, q=%d", port, + queue_conf->ev.queue_id); + return rc; + } + + dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags; + + return 0; +} + +int +cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + RTE_SET_USED(event_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + rxq_sp = eth_dev->data->rx_queues[rx_queue_id]; + rxq_sp = rxq_sp - 1; + rc = cnxk_sso_rxq_disable(cnxk_eth_dev, i); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, false, + dev->force_ena_bp); + } + } else { + rxq_sp = eth_dev->data->rx_queues[rx_queue_id]; + rxq_sp = rxq_sp - 1; + rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, false, + dev->force_ena_bp); + } + + if (rc < 0) + plt_err("Failed to clear Rx adapter config port=%d, q=%d", + eth_dev->data->port_id, rx_queue_id); + + return rc; +} + +int +cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +int +cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index 87bb9f76a9..eda562f5b5 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -21,4 +21,11 @@ sources = files( 'cnxk_tim_worker.c', ) -deps += ['bus_pci', 'common_cnxk'] +extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing'] +foreach flag: extra_flags + if cc.has_argument(flag) + cflags += flag + endif +endforeach + +deps += ['bus_pci', 'common_cnxk', 'net_cnxk'] -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 136 +++++++- drivers/event/cnxk/cn10k_worker.c | 54 ---- drivers/event/cnxk/cn10k_worker.h | 97 +++++- drivers/event/cnxk/cn10k_worker_deq.c | 44 +++ drivers/event/cnxk/cn10k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn10k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_eventdev.c | 305 +++++++++++++++++- drivers/event/cnxk/cn9k_worker.c | 117 ------- drivers/event/cnxk/cn9k_worker.h | 174 ++++++++-- drivers/event/cnxk/cn9k_worker_deq.c | 44 +++ drivers/event/cnxk/cn9k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn9k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_worker_dual_deq.c | 53 +++ .../event/cnxk/cn9k_worker_dual_deq_burst.c | 30 ++ drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 89 +++++ drivers/event/cnxk/cnxk_eventdev.h | 1 + drivers/event/cnxk/meson.build | 9 + 17 files changed, 1124 insertions(+), 231 deletions(-) create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 2060c8fe84..ba7d95fff7 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -237,17 +237,141 @@ static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn10k_sso_hws_deq; - event_dev->dequeue_burst = cn10k_sso_hws_deq_burst; - if (dev->is_timeout_deq) { - event_dev->dequeue = cn10k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } } diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index 5dbae275ba..c71aa37327 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -60,57 +60,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return 1; - } - - return cn10k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return ret; - } - - ret = cn10k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn10k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index c7250bf9e7..b724083caa 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -87,20 +87,37 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, cn10k_sso_hws_fwd_group(ws, ev, grp); } +static __rte_always_inline void +cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t -cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) +cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, void *lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; gw.get_work = ws->gw_wdata; #if defined(RTE_ARCH_ARM64) && !defined(__clang__) asm volatile( PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" - : [wdata] "+r"(gw.get_work) + "sub %[mbuf], %H[wdata], #0x80 \n" + : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf) : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else @@ -109,11 +126,34 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -128,6 +168,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -138,7 +179,9 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " ldp %[tag], %[wqp], [%[tag_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else @@ -146,12 +189,25 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -170,16 +226,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn10k_worker_deq.c b/drivers/event/cnxk/cn10k_worker_deq.c new file mode 100644 index 0000000000..36ec454ccc --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_burst.c b/drivers/event/cnxk/cn10k_worker_deq_burst.c new file mode 100644 index 0000000000..29ecc551cf --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_tmo.c b/drivers/event/cnxk/cn10k_worker_deq_tmo.c new file mode 100644 index 0000000000..c8524a27bd --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 072800c243..e386cb784a 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -252,17 +252,202 @@ static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + /* Single WS modes */ + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + /* Dual WS modes */ + const event_dequeue_t sso_hws_dual_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn9k_sso_hws_deq; - event_dev->dequeue_burst = cn9k_sso_hws_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } if (dev->dual_ws) { @@ -272,14 +457,110 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) event_dev->enqueue_forward_burst = cn9k_sso_hws_dual_enq_fwd_burst; - event_dev->dequeue = cn9k_sso_hws_dual_deq; - event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq; - event_dev->dequeue_burst = - cn9k_sso_hws_dual_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_dual_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_dual_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } } } + + rte_mb(); } static void * diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c index 9ceacc98dd..538bc4b0b3 100644 --- a/drivers/event/cnxk/cn9k_worker.c +++ b/drivers/event/cnxk/cn9k_worker.c @@ -60,60 +60,6 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } -uint16_t __rte_hot -cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return 1; - } - - return cn9k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return ret; - } - - ret = cn9k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn9k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} - /* Dual ws ops. */ uint16_t __rte_hot @@ -171,66 +117,3 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t gw; - - RTE_SET_USED(timeout_ticks); - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return 1; - } - - gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - return gw; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t ret = 1; - uint64_t iter; - - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return ret; - } - - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - } - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index f5a4401465..c01c00e1da 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -128,17 +128,36 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, } } +static __rte_always_inline void +cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, struct cn9k_sso_hws_state *ws_pair, - struct rte_event *ev) + struct rte_event *ev, const uint32_t flags, + const void *const lookup_mem, + struct cnxk_timesync_info *const tstamp) { const uint64_t set_gw = BIT_ULL(16) | 1; union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE "rty%=: \n" @@ -147,7 +166,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, " tbnz %[tag], 63, rty%= \n" "done%=: str %[gw], [%[pong]] \n" " dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op), [gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op)); #else @@ -156,12 +178,34 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); plt_write64(set_gw, ws_pair->getwrk_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -169,16 +213,22 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, } static __rte_always_inline uint16_t -cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) +cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, const void *const lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; plt_write64(BIT_ULL(16) | /* wait for work. */ 1, /* Use Mask set 0. */ ws->getwrk_op); + + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE " ldr %[tag], [%[tag_loc]] \n" @@ -190,7 +240,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -198,12 +251,35 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -218,6 +294,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -230,7 +307,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -238,12 +317,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -274,28 +366,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); - -uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c new file mode 100644 index 0000000000..51ccaf4ec4 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_burst.c b/drivers/event/cnxk/cn9k_worker_deq_burst.c new file mode 100644 index 0000000000..4e2801459b --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c new file mode 100644 index 0000000000..9713d1ef00 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c new file mode 100644 index 0000000000..709fa2d9ef --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c new file mode 100644 index 0000000000..d50e1cf83f --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c new file mode 100644 index 0000000000..a0508fdf0d --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_##name(port, ev, \ + timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index b65d725f55..9d5d2d0339 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -33,6 +33,7 @@ #define CNXK_SSO_MZ_NAME "cnxk_evdev_mz" #define CNXK_SSO_XAQ_CACHE_CNT (0x7) #define CNXK_SSO_XAQ_SLACK (8) +#define CNXK_SSO_WQE_SG_PTR (9) #define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) #define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY) diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index eda562f5b5..c5c1c0ee8e 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -11,8 +11,17 @@ endif sources = files( 'cn9k_eventdev.c', 'cn9k_worker.c', + 'cn9k_worker_deq.c', + 'cn9k_worker_deq_burst.c', + 'cn9k_worker_deq_tmo.c', + 'cn9k_worker_dual_deq.c', + 'cn9k_worker_dual_deq_burst.c', + 'cn9k_worker_dual_deq_tmo.c', 'cn10k_eventdev.c', 'cn10k_worker.c', + 'cn10k_worker_deq.c', + 'cn10k_worker_deq_burst.c', + 'cn10k_worker_deq_tmo.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 4 +- doc/guides/rel_notes/release_21_08.rst | 6 +- drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++++++ drivers/event/cnxk/cn9k_eventdev.c | 117 +++++++++++++++++++++++ drivers/event/cnxk/cnxk_eventdev.h | 21 +++- drivers/event/cnxk/cnxk_eventdev_adptr.c | 106 ++++++++++++++++++++ 6 files changed, 339 insertions(+), 6 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index b7e82c1273..6fdccc2ab4 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -42,7 +42,9 @@ Features of the OCTEON cnxk SSO PMD are: - HW managed packets enqueued from ethdev to eventdev exposed through event eth RX adapter. - N:1 ethernet device Rx queue to Event queue mapping. -- Full Rx offload support defined through ethdev queue configuration. +- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` + capability while maintaining receive packet order. +- Full Rx/Tx offload support defined through ethdev queue configuration. Prerequisites and Compilation procedure --------------------------------------- diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 3892c8017a..80ff93269c 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -60,10 +60,10 @@ New Features * Added net/cnxk driver which provides the support for the integrated ethernet device. -* **Added support for Marvell CN10K, CN9K, event Rx adapter.** +* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.** - * Added Rx adapter support for event/cnxk when the ethernet device requested is - net/cnxk. + * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested + is net/cnxk. Removed Items diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index ba7d95fff7..8a9b04a3db 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); + ws->tx_base = ws->base; ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn10k_sso_hws) + + (sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + + return 0; +} + static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset, cn10k_sso_hws_flush_events); if (rc < 0) @@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + return cn10k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index e386cb784a..bdc5632235 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(dws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + dws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&dws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = dws; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + } + rte_mb(); + + return 0; +} + static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset, cn9k_sso_hws_flush_events); if (rc < 0) @@ -844,6 +908,55 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + return cn9k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -863,6 +976,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 9d5d2d0339..458fdc8d92 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -8,6 +8,7 @@ #include <rte_devargs.h> #include <rte_ethdev.h> #include <rte_event_eth_rx_adapter.h> +#include <rte_event_eth_tx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -86,9 +87,12 @@ struct cnxk_sso_evdev { rte_iova_t fc_iova; struct rte_mempool *xaq_pool; uint64_t rx_offloads; + uint64_t tx_offloads; uint64_t adptr_xae_cnt; uint16_t rx_adptr_pool_cnt; uint64_t *rx_adptr_pools; + uint64_t *tx_adptr_data; + uint16_t max_port_id; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -115,7 +119,10 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; + /* Tx Fastpath data */ + uint64_t tx_base __rte_cache_aligned; uintptr_t lmt_base; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; /* CN9K HWS ops */ @@ -140,7 +147,9 @@ struct cn9k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; + /* Tx Fastpath data */ + uint64_t base __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cn9k_sso_hws_state { @@ -160,7 +169,9 @@ struct cn9k_sso_hws_dual { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base[2]; + /* Tx Fastpath data */ + uint64_t base[2] __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cnxk_sso_hws_cookie { @@ -267,5 +278,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); +int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); +int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 24bfd985e7..548d7b81ce 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -5,6 +5,8 @@ #include "cnxk_ethdev.h" #include "cnxk_eventdev.h" +#define CNXK_SSO_SQB_LIMIT (0x180) + void cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, uint32_t event_type) @@ -241,3 +243,107 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, return 0; } + +static int +cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs) +{ + uint16_t sqb_limit; + + sqb_limit = RTE_MIN(nb_sqb_bufs, sq->nb_sqb_bufs); + return roc_npa_aura_limit_modify(sq->aura_handle, sqb_limit); +} + +static int +cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev, + uint16_t eth_port_id, uint16_t tx_queue_id, + void *txq) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t max_port_id = dev->max_port_id; + uint64_t *txq_data = dev->tx_adptr_data; + + if (txq_data == NULL || eth_port_id > max_port_id) { + max_port_id = RTE_MAX(max_port_id, eth_port_id); + txq_data = rte_realloc_socket( + txq_data, + (sizeof(uint64_t) * (max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, event_dev->data->socket_id); + if (txq_data == NULL) + return -ENOMEM; + } + + ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) + txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq; + dev->max_port_id = max_port_id; + dev->tx_adptr_data = txq_data; + return 0; +} + +int +cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct roc_nix_sq *sq; + int i, ret; + void *txq; + + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { + txq = eth_dev->data->tx_queues[i]; + sq = &cnxk_eth_dev->sqs[i]; + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, i, txq); + if (ret < 0) + return ret; + } + } else { + txq = eth_dev->data->tx_queues[tx_queue_id]; + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, txq); + if (ret < 0) + return ret; + } + + dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags; + + return 0; +} + +int +cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct roc_nix_sq *sq; + int i, ret; + + RTE_SET_USED(event_dev); + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { + sq = &cnxk_eth_dev->sqs[i]; + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, + NULL); + if (ret < 0) + return ret; + } + } else { + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, NULL); + if (ret < 0) + return ret; + } + + return 0; +} -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 38 ++++++++ drivers/event/cnxk/cn10k_worker.h | 67 ++++++++++++++ drivers/event/cnxk/cn10k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn10k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_eventdev.c | 81 +++++++++++++++++ drivers/event/cnxk/cn9k_worker.h | 87 +++++++++++++++++++ drivers/event/cnxk/cn9k_worker_dual_tx_enq.c | 23 +++++ .../event/cnxk/cn9k_worker_dual_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/meson.build | 6 ++ 11 files changed, 417 insertions(+) create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 8a9b04a3db..e462f770c5 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; @@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; } static void diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index b724083caa..3c90c85009 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -11,6 +11,7 @@ #include "cn10k_ethdev.h" #include "cn10k_rx.h" +#include "cn10k_tx.h" /* SSO Operations */ @@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn10k_eth_txq * +cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn10k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline uint16_t +cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, + uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + const struct cn10k_eth_txq *txq; + struct rte_mbuf *m = ev->mbuf; + uint16_t ref_cnt = m->refcnt; + uintptr_t lmt_addr; + uint16_t lmt_id; + uintptr_t pa; + + lmt_addr = ws->lmt_base; + ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + txq = cn10k_sso_hws_xtract_meta(m, txq_data); + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; + } + if (!ev->sched_type) + cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, + ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c new file mode 100644 index 0000000000..f9968ac0d0 --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c new file mode 100644 index 0000000000..a24fc42e5a --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index bdc5632235..af97020f2f 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -430,6 +430,39 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; @@ -510,6 +543,25 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) } } + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + if (dev->dual_ws) { event_dev->enqueue = cn9k_sso_hws_dual_enq; event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst; @@ -618,8 +670,37 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] + */ + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } } + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; rte_mb(); } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index c01c00e1da..5aa053c586 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -11,6 +11,7 @@ #include "cn9k_ethdev.h" #include "cn9k_rx.h" +#include "cn9k_tx.h" /* SSO Operations */ @@ -416,4 +417,90 @@ NIX_RX_FASTPATH_MODES NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn9k_eth_txq * +cn9k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn9k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline void +cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m, + uint64_t *cmd, const uint32_t flags) +{ + roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags)); + cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt); +} + +static __rte_always_inline uint16_t +cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + struct rte_mbuf *m = ev->mbuf; + const struct cn9k_eth_txq *txq; + uint16_t ref_cnt = m->refcnt; + + /* Perform header writes before barrier for TSO */ + cn9k_nix_xmit_prepare_tso(m, flags); + /* Lets commit any changes in the packet here in case when + * fast free is set as no further changes will be made to mbuf. + * In case of fast free is not set, both cn9k_nix_prepare_mseg() + * and cn9k_nix_xmit_prepare() has a barrier after refcnt update. + */ + if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_io_wmb(); + txq = cn9k_sso_hws_xtract_meta(m, txq_data); + cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags); + + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, + txq->io_addr, segdw); + } else { + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, + segdw); + } + } else { + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_one(cmd, txq->lmt_addr, + txq->io_addr, flags); + } else { + cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, + flags); + } + } + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG, + base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c new file mode 100644 index 0000000000..92e2981f02 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws_dual *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c new file mode 100644 index 0000000000..dfb574cf95 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws_dual *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c new file mode 100644 index 0000000000..3df649c0c8 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c new file mode 100644 index 0000000000..0efe29113e --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index c5c1c0ee8e..13e0634e86 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -17,11 +17,17 @@ sources = files( 'cn9k_worker_dual_deq.c', 'cn9k_worker_dual_deq_burst.c', 'cn9k_worker_dual_deq_tmo.c', + 'cn9k_worker_tx_enq.c', + 'cn9k_worker_tx_enq_seg.c', + 'cn9k_worker_dual_tx_enq.c', + 'cn9k_worker_dual_tx_enq_seg.c', 'cn10k_eventdev.c', 'cn10k_worker.c', 'cn10k_worker_deq.c', 'cn10k_worker_deq_burst.c', 'cn10k_worker_deq_tmo.c', + 'cn10k_worker_tx_enq.c', + 'cn10k_worker_tx_enq_seg.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add event vector support for cnxk event Rx adapter, add control path APIs to get vector limits and ability to configure event vectorization on a given Rx queue. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 2 + drivers/event/cnxk/cn10k_eventdev.c | 106 ++++++++++++++++++++++- drivers/event/cnxk/cnxk_eventdev.h | 2 + drivers/event/cnxk/cnxk_eventdev_adptr.c | 25 ++++++ drivers/net/cnxk/cnxk_ethdev.h | 2 +- 5 files changed, 135 insertions(+), 2 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 6fdccc2ab4..0297cd3d5f 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -45,6 +45,8 @@ Features of the OCTEON cnxk SSO PMD are: - Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` capability while maintaining receive packet order. - Full Rx/Tx offload support defined through ethdev queue configuration. +- HW managed event vectorization on CN10K for packets enqueued from ethdev to + eventdev configurable per each Rx queue in Rx adapter. Prerequisites and Compilation procedure --------------------------------------- diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e462f770c5..e85fa4785d 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, else *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | - RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID | + RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR; return 0; } @@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_rx_adapter_vector_limits( + const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, + struct rte_event_eth_rx_adapter_vector_limits *limits) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + return -ENOTSUP; + + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + limits->log2_sz = true; + limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2; + limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2; + limits->min_timeout_ns = + (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100; + limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns; + + return 0; +} + +static int +cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev, + uint16_t port_id, uint16_t rq_id, uint16_t sz, + uint64_t tmo_ns, struct rte_mempool *vmp) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + + if (!rq->sso_ena) + return -EINVAL; + if (rq->flow_tag_width == 0) + return -EINVAL; + + rq->vwqe_ena = 1; + rq->vwqe_first_skip = 0; + rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id); + rq->vwqe_max_sz_exp = rte_log2_u32(sz); + rq->vwqe_wait_tmo = + tmo_ns / + ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100); + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= + (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4)) + << 24; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cn10k_sso_rx_adapter_vector_config( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_event_vector_config *config) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + struct cnxk_sso_evdev *dev; + int i, rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + dev = cnxk_sso_pmd_priv(event_dev); + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, i, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + } else { + + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, rx_queue_id, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + + return 0; +} + static int cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, uint32_t *caps) @@ -739,6 +839,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_rx_adapter_vector_limits_get = cn10k_sso_rx_adapter_vector_limits, + .eth_rx_adapter_event_vector_config = + cn10k_sso_rx_adapter_vector_config, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 458fdc8d92..3783e0c95b 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -96,6 +96,8 @@ struct cnxk_sso_evdev { uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; + uint16_t vec_pool_cnt; + uint64_t *vec_pools; /* Dev args */ uint32_t xae_cnt; uint8_t qos_queue_cnt; diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 548d7b81ce..c4c4f5a7f4 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -40,6 +40,31 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, dev->adptr_xae_cnt += rxq->qconf.mp->size; break; } + case RTE_EVENT_TYPE_ETHDEV_VECTOR: { + struct rte_mempool *mp = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->vec_pool_cnt; i++) { + if ((uint64_t)mp == dev->vec_pools[i]) + return; + } + + dev->vec_pool_cnt++; + old_ptr = dev->vec_pools; + dev->vec_pools = + rte_realloc(dev->vec_pools, + sizeof(uint64_t) * dev->vec_pool_cnt, 0); + if (dev->vec_pools == NULL) { + dev->adptr_xae_cnt += mp->size; + dev->vec_pools = old_ptr; + dev->vec_pool_cnt--; + return; + } + dev->vec_pools[dev->vec_pool_cnt - 1] = (uint64_t)mp; + + dev->adptr_xae_cnt += mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h index 4eead03905..2528b3cdaa 100644 --- a/drivers/net/cnxk/cnxk_ethdev.h +++ b/drivers/net/cnxk/cnxk_ethdev.h @@ -238,7 +238,7 @@ struct cnxk_eth_txq_sp { } __plt_cache_aligned; static inline struct cnxk_eth_dev * -cnxk_eth_pmd_priv(struct rte_eth_dev *eth_dev) +cnxk_eth_pmd_priv(const struct rte_eth_dev *eth_dev) { return eth_dev->data->dev_private; } -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Rx event vector fastpath to convert HW defined metadata into rte_mbuf and rte_event_vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/rel_notes/release_21_08.rst | 1 + drivers/event/cnxk/cn10k_worker.h | 56 +++++++ drivers/net/cnxk/cn10k_rx.h | 200 +++++++++++++++---------- drivers/net/cnxk/cn10k_rx_vec.c | 2 +- drivers/net/cnxk/cn10k_rx_vec_mseg.c | 5 +- 5 files changed, 179 insertions(+), 85 deletions(-) diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 80ff93269c..11ccc9bcb5 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -64,6 +64,7 @@ New Features * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested is net/cnxk. + * Add support for event vectorization for Rx adapter. Removed Items diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 3c90c85009..7a48a6b17d 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,6 +5,8 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include <rte_vect.h> + #include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" @@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, mbuf_init | ((uint64_t)port_id) << 48, flags); } +static __rte_always_inline void +cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, + void *lookup_mem, void *tstamp) +{ + uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + struct rte_event_vector *vec; + uint16_t nb_mbufs, non_vec; + uint64_t **wqe; + + mbuf_init |= ((uint64_t)port_id) << 48; + vec = (struct rte_event_vector *)vwqe; + wqe = vec->u64s; + + nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP); + nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs, + flags | NIX_RX_VWQE_F, lookup_mem, + tstamp); + wqe += nb_mbufs; + non_vec = vec->nb_elem - nb_mbufs; + + while (non_vec) { + struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0]; + struct rte_mbuf *mbuf; + uint64_t tstamp_ptr; + + mbuf = (struct rte_mbuf *)((char *)cqe - + sizeof(struct rte_mbuf)); + cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, + mbuf_init, flags); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + wqe[0] = (uint64_t *)mbuf; + non_vec--; + wqe++; + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, const uint32_t flags, void *lookup_mem) @@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, flags & NIX_RX_MULTI_SEG_F, (uint64_t *)tstamp_ptr); gw.u64[1] = mbuf; + } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV_VECTOR) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1]; + + vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | + ((vwqe_hdr & 0xFFFF) << 48) | + ((uint64_t)port << 32); + *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr; + cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem, + ws->tstamp); } } diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index d9572b19e7..a506a867ca 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -21,6 +21,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_RX_VWQE_F BIT(14) #define NIX_RX_MULTI_SEG_F BIT(15) #define CNXK_NIX_CQ_ENTRY_SZ 128 @@ -28,6 +29,11 @@ #define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x)) #define CQE_SZ(x) ((x) * CNXK_NIX_CQ_ENTRY_SZ) +#define CQE_PTR_OFF(b, i, o, f) \ + (((f) & NIX_RX_VWQE_F) ? \ + (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \ + (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o))) + union mbuf_initializer { struct { uint16_t data_off; @@ -317,61 +323,87 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf) } static __rte_always_inline uint16_t -cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) +cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, + const uint16_t flags, void *lookup_mem, + struct cnxk_timesync_info *tstamp) { - struct cn10k_eth_rxq *rxq = rx_queue; - uint16_t packets = 0; + struct cn10k_eth_rxq *rxq = args; + const uint64_t mbuf_initializer = (flags & NIX_RX_VWQE_F) ? + *(uint64_t *)args : + rxq->mbuf_initializer; + const uint64x2_t data_off = flags & NIX_RX_VWQE_F ? + vdupq_n_u64(0x80ULL) : + vdupq_n_u64(rxq->data_off); + const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask; + const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata; + const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc; uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23; - const uint64_t mbuf_initializer = rxq->mbuf_initializer; - const uint64x2_t data_off = vdupq_n_u64(rxq->data_off); uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3; uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer); struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3; - const uint16_t *lookup_mem = rxq->lookup_mem; - const uint32_t qmask = rxq->qmask; - const uint64_t wdata = rxq->wdata; - const uintptr_t desc = rxq->desc; uint8x16_t f0, f1, f2, f3; - uint32_t head = rxq->head; + uint16_t packets = 0; uint16_t pkts_left; - - pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); - pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); - - /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + uint32_t head; + uintptr_t cq0; + + if (!(flags & NIX_RX_VWQE_F)) { + lookup_mem = rxq->lookup_mem; + head = rxq->head; + + pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); + pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); + /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) + tstamp = rxq->tstamp; + } else { + RTE_SET_USED(head); + } while (packets < pkts) { - /* Exit loop if head is about to wrap and become unaligned */ - if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < - NIX_DESCS_PER_LOOP) { - pkts_left += (pkts - packets); - break; - } + if (!(flags & NIX_RX_VWQE_F)) { + /* Exit loop if head is about to wrap and become + * unaligned. + */ + if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < + NIX_DESCS_PER_LOOP) { + pkts_left += (pkts - packets); + break; + } - const uintptr_t cq0 = desc + CQE_SZ(head); + cq0 = desc + CQE_SZ(head); + } else { + cq0 = (uintptr_t)&mbufs[packets]; + } /* Prefetch N desc ahead */ - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(8))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(9))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(10))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(11))); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 8, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 9, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 10, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 11, 0, flags)); /* Get NIX_RX_SG_S for size and buffer pointer */ - cq0_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(0) + 64)); - cq1_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(1) + 64)); - cq2_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(2) + 64)); - cq3_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(3) + 64)); - - /* Extract mbuf from NIX_RX_SG_S */ - mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); - mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); - mbuf01 = vqsubq_u64(mbuf01, data_off); - mbuf23 = vqsubq_u64(mbuf23, data_off); + cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags)); + cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags)); + cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags)); + cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags)); + + if (!(flags & NIX_RX_VWQE_F)) { + /* Extract mbuf from NIX_RX_SG_S */ + mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); + mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); + mbuf01 = vqsubq_u64(mbuf01, data_off); + mbuf23 = vqsubq_u64(mbuf23, data_off); + } else { + mbuf01 = + vsubq_u64(vld1q_u64((uint64_t *)cq0), data_off); + mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)), + data_off); + } /* Move mbufs to scalar registers for future use */ mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0); @@ -395,14 +427,14 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, f3 = vqtbl1q_u8(cq3_w8, shuf_msk); /* Load CQE word0 and word 1 */ - uint64_t cq0_w0 = ((uint64_t *)(cq0 + CQE_SZ(0)))[0]; - uint64_t cq0_w1 = ((uint64_t *)(cq0 + CQE_SZ(0)))[1]; - uint64_t cq1_w0 = ((uint64_t *)(cq0 + CQE_SZ(1)))[0]; - uint64_t cq1_w1 = ((uint64_t *)(cq0 + CQE_SZ(1)))[1]; - uint64_t cq2_w0 = ((uint64_t *)(cq0 + CQE_SZ(2)))[0]; - uint64_t cq2_w1 = ((uint64_t *)(cq0 + CQE_SZ(2)))[1]; - uint64_t cq3_w0 = ((uint64_t *)(cq0 + CQE_SZ(3)))[0]; - uint64_t cq3_w1 = ((uint64_t *)(cq0 + CQE_SZ(3)))[1]; + const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags); + const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 1, flags); + const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags); + const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 1, flags); + const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags); + const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 1, flags); + const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags); + const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 1, flags); if (flags & NIX_RX_OFFLOAD_RSS_F) { /* Fill rss in the rx_descriptor_fields1 */ @@ -459,17 +491,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) { ol_flags0 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(0) + 38), ol_flags0, - mbuf0); + *(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags), + ol_flags0, mbuf0); ol_flags1 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(1) + 38), ol_flags1, - mbuf1); + *(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags), + ol_flags1, mbuf1); ol_flags2 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(2) + 38), ol_flags2, - mbuf2); + *(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags), + ol_flags2, mbuf2); ol_flags3 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(3) + 38), ol_flags3, - mbuf3); + *(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags), + ol_flags3, mbuf3); } if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { @@ -488,7 +520,7 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, RTE_PTYPE_L2_ETHER_TIMESYNC}; const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | PKT_RX_IEEE1588_TMST | - rxq->tstamp->rx_tstamp_dynflag; + tstamp->rx_tstamp_dynflag; const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; uint64x2_t ts01, ts23, mask; uint64_t ts[4]; @@ -526,14 +558,10 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, ts[3] = vgetq_lane_u64(ts23, 1); /* Store timestamp into dynfield. */ - *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = - ts[0]; - *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = - ts[1]; - *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = - ts[2]; - *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = - ts[3]; + *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3]; /* Generate ptype mask to filter L2 ether timesync */ mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); @@ -559,9 +587,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, /* Update Rxq timestamp with the latest * timestamp. */ - rxq->tstamp->rx_ready = 1; - rxq->tstamp->rx_tstamp = - ts[31 - __builtin_clz(res)]; + tstamp->rx_ready = 1; + tstamp->rx_tstamp = ts[31 - __builtin_clz(res)]; } } @@ -584,25 +611,25 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); /* Store the mbufs to rx_pkts */ - vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); - vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); + vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); if (flags & NIX_RX_MULTI_SEG_F) { /* Multi segment is enable build mseg list for * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 0, 8, flags)), + mbuf0, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 1, 8, flags)), + mbuf1, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 2, 8, flags)), + mbuf2, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 3, 8, flags)), + mbuf3, mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; @@ -623,12 +650,18 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, __mempool_check_cookies(mbuf2->pool, (void **)&mbuf2, 1, 1); __mempool_check_cookies(mbuf3->pool, (void **)&mbuf3, 1, 1); - /* Advance head pointer and packets */ - head += NIX_DESCS_PER_LOOP; - head &= qmask; packets += NIX_DESCS_PER_LOOP; + + if (!(flags & NIX_RX_VWQE_F)) { + /* Advance head pointer and packets */ + head += NIX_DESCS_PER_LOOP; + head &= qmask; + } } + if (flags & NIX_RX_VWQE_F) + return packets; + rxq->head = head; rxq->available -= packets; @@ -637,8 +670,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, plt_write64((rxq->wdata | packets), rxq->cq_door); if (unlikely(pkts_left)) - packets += cn10k_nix_recv_pkts(rx_queue, &rx_pkts[packets], - pkts_left, flags); + packets += cn10k_nix_recv_pkts(args, &mbufs[packets], pkts_left, + flags); return packets; } @@ -647,12 +680,15 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, static inline uint16_t cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) + uint16_t pkts, const uint16_t flags, + void *lookup_mem, void *tstamp) { + RTE_SET_USED(lookup_mem); RTE_SET_USED(rx_queue); RTE_SET_USED(rx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(flags); + RTE_SET_USED(tstamp); return 0; } diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c index 93528a44f9..166735ad59 100644 --- a/drivers/net/cnxk/cn10k_rx_vec.c +++ b/drivers/net/cnxk/cn10k_rx_vec.c @@ -12,7 +12,7 @@ uint16_t pkts) \ { \ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags)); \ + (flags), NULL, NULL); \ } NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c index 04d1e46c82..1f44dddddd 100644 --- a/drivers/net/cnxk/cn10k_rx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c @@ -9,8 +9,9 @@ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ { \ - return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags) | NIX_RX_MULTI_SEG_F); \ + return cn10k_nix_recv_pkts_vector( \ + rx_queue, rx_pkts, pkts, (flags) | NIX_RX_MULTI_SEG_F, \ + NULL, NULL); \ } NIX_RX_FASTPATH_MODES -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Tx event vector fastpath, integrate event vector Tx routine into Tx burst. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 1 + doc/guides/rel_notes/release_21_08.rst | 2 +- drivers/common/cnxk/roc_sso.h | 23 ++++++ drivers/event/cnxk/cn10k_eventdev.c | 3 +- drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++-- drivers/event/cnxk/cn9k_worker.h | 4 +- drivers/event/cnxk/cnxk_worker.h | 22 ------ drivers/net/cnxk/cn10k_tx.c | 2 +- drivers/net/cnxk/cn10k_tx.h | 52 +++++++++---- drivers/net/cnxk/cn10k_tx_mseg.c | 3 +- drivers/net/cnxk/cn10k_tx_vec.c | 2 +- drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +- 12 files changed, 167 insertions(+), 53 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 0297cd3d5f..53560d3830 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -47,6 +47,7 @@ Features of the OCTEON cnxk SSO PMD are: - Full Rx/Tx offload support defined through ethdev queue configuration. - HW managed event vectorization on CN10K for packets enqueued from ethdev to eventdev configurable per each Rx queue in Rx adapter. +- Event vector transmission via Tx adapter. Prerequisites and Compilation procedure --------------------------------------- diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 11ccc9bcb5..9e49cb27d7 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -64,7 +64,7 @@ New Features * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested is net/cnxk. - * Add support for event vectorization for Rx adapter. + * Add support for event vectorization for Rx/Tx adapter. Removed Items diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h index a6030e7d8a..316c6ccd59 100644 --- a/drivers/common/cnxk/roc_sso.h +++ b/drivers/common/cnxk/roc_sso.h @@ -44,6 +44,29 @@ struct roc_sso { uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned; } __plt_cache_aligned; +static __rte_always_inline void +roc_sso_hws_head_wait(uintptr_t tag_op) +{ +#ifdef RTE_ARCH_ARM64 + uint64_t tag; + + asm volatile(PLT_CPU_FEATURE_PREAMBLE + " ldr %[tag], [%[tag_op]] \n" + " tbnz %[tag], 35, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[tag], [%[tag_op]] \n" + " tbz %[tag], 35, rty%= \n" + "done%=: \n" + : [tag] "=&r"(tag) + : [tag_op] "r"(tag_op)); +#else + /* Wait for the SWTAG/SWTAG_FULL operation */ + while (!(plt_read64(tag_op) & BIT_ULL(35))) + ; +#endif +} + /* SSO device initialization */ int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso); int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso); diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e85fa4785d..6f37c5bd23 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, if (ret) *caps = 0; else - *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR; return 0; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 7a48a6b17d..9cc0992063 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R -static __rte_always_inline const struct cn10k_eth_txq * +static __rte_always_inline struct cn10k_eth_txq * cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) { - return (const struct cn10k_eth_txq *) + return (struct cn10k_eth_txq *) txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; } +static __rte_always_inline void +cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs, + uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr, + uint8_t sched_type, uintptr_t base, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + uint16_t port[4], queue[4]; + struct cn10k_eth_txq *txq; + uint16_t i, j; + uintptr_t pa; + + for (i = 0; i < nb_mbufs; i += 4) { + port[0] = mbufs[i]->port; + port[1] = mbufs[i + 1]->port; + port[2] = mbufs[i + 2]->port; + port[3] = mbufs[i + 3]->port; + + queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]); + queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]); + queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]); + queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]); + + if (((port[0] ^ port[1]) & (port[2] ^ port[3])) || + ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) { + + for (j = 0; j < 4; j++) { + struct rte_mbuf *m = mbufs[i + j]; + + txq = (struct cn10k_eth_txq *) + txq_data[port[j]][queue[j]]; + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier + * for TSO + */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, + txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg( + m, (uint64_t *)lmt_addr, + flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | + (cn10k_nix_tx_ext_subs(flags) + 1) + << 4; + } + if (!sched_type) + roc_sso_hws_head_wait(base + + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + } + } else { + txq = (struct cn10k_eth_txq *) + txq_data[port[0]][queue[0]]; + cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base + + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], const uint32_t flags) { - const struct cn10k_eth_txq *txq; - struct rte_mbuf *m = ev->mbuf; - uint16_t ref_cnt = m->refcnt; + struct cn10k_eth_txq *txq; + struct rte_mbuf *m; uintptr_t lmt_addr; + uint16_t ref_cnt; uint16_t lmt_id; uintptr_t pa; lmt_addr = ws->lmt_base; ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + + if (ev->event_type & RTE_EVENT_TYPE_VECTOR) { + struct rte_mbuf **mbufs = ev->vec->mbufs; + uint64_t meta = *(uint64_t *)ev->vec; + + if (meta & BIT(31)) { + txq = (struct cn10k_eth_txq *) + txq_data[meta >> 32][meta >> 48]; + + cn10k_nix_xmit_pkts_vector( + txq, mbufs, meta & 0xFFFF, cmd, + ws->tx_base + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } else { + cn10k_sso_vwqe_split_tx( + mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr, + ev->sched_type, ws->tx_base, txq_data, flags); + } + rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec); + return (meta & 0xFFFF); + } + + m = ev->mbuf; + ref_cnt = m->refcnt; txq = cn10k_sso_hws_xtract_meta(m, txq_data); cn10k_nix_tx_skeleton(txq, cmd, flags); /* Perform header writes before barrier for TSO */ @@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; } if (!ev->sched_type) - cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); roc_lmt_submit_steorl(lmt_id, pa); @@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); - return 1; } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 5aa053c586..ef1e83741a 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -458,7 +458,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, segdw); @@ -469,7 +469,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, } else { if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags); diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h index 4eb46ae162..945132b748 100644 --- a/drivers/event/cnxk/cnxk_worker.h +++ b/drivers/event/cnxk/cnxk_worker.h @@ -75,27 +75,5 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op) #endif } -static __rte_always_inline void -cnxk_sso_hws_head_wait(uintptr_t tag_op) -{ -#ifdef RTE_ARCH_ARM64 - uint64_t swtp; - - asm volatile(PLT_CPU_FEATURE_PREAMBLE - " ldr %[swtb], [%[swtp_loc]] \n" - " tbz %[swtb], 35, done%= \n" - " sevl \n" - "rty%=: wfe \n" - " ldr %[swtb], [%[swtp_loc]] \n" - " tbnz %[swtb], 35, rty%= \n" - "done%=: \n" - : [swtb] "=&r"(swtp) - : [swtp_loc] "r"(tag_op)); -#else - /* Wait for the SWTAG/SWTAG_FULL operation */ - while (plt_read64(tag_op) & BIT_ULL(35)) - ; -#endif -} #endif diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 1f30bab59a..0e1276c60b 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \ - flags); \ + 0, flags); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 532b53b319..d2a24120ef 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -18,6 +18,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_TX_VWQE_F BIT(14) #define NIX_TX_MULTI_SEG_F BIT(15) #define NIX_TX_NEED_SEND_HDR_W1 \ @@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) static __rte_always_inline uint16_t cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, - uint64_t *cmd, const uint16_t flags) + uint64_t *cmd, uintptr_t base, const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; const rte_iova_t io_addr = txq->io_addr; @@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t lso_tun_fmt; uint64_t data; - NIX_XMIT_FC_OR_RETURN(txq, pkts); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } /* Get cmd skeleton */ cn10k_nix_tx_skeleton(txq, cmd, flags); - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; - if (flags & NIX_TX_OFFLOAD_TSO_F) lso_tun_fmt = txq->lso_tun_fmt; @@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2); } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (burst > 16) { data = cn10k_nix_tx_steor_data(flags); @@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; uintptr_t pa0, pa1, lmt_addr = txq->lmt_base; @@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, shft += 3; } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + data0 = (uint64_t)data128; data1 = (uint64_t)(data128 >> 64); /* Make data0 similar to data1 */ @@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; @@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64_t data[2]; } wd; - NIX_XMIT_FC_OR_RETURN(txq, pkts); - - scalar = pkts & (NIX_DESCS_PER_LOOP - 1); - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } else { + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + } - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; /* Perform header writes before barrier for TSO */ if (flags & NIX_TX_OFFLOAD_TSO_F) { for (i = 0; i < pkts; i++) @@ -1972,6 +1986,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (flags & NIX_TX_MULTI_SEG_F) wd.data[0] >>= 16; + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (lnum > 16) { if (!(flags & NIX_TX_MULTI_SEG_F)) @@ -2028,10 +2045,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (unlikely(scalar)) { if (flags & NIX_TX_MULTI_SEG_F) pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, - scalar, cmd, flags); + scalar, cmd, base, + flags); else pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, - cmd, flags); + cmd, base, flags); } return pkts; @@ -2040,13 +2058,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, #else static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { RTE_SET_USED(tx_queue); RTE_SET_USED(tx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(cmd); RTE_SET_USED(flags); + RTE_SET_USED(base); return 0; } #endif diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c index 33f6754722..4ea4c8a4e5 100644 --- a/drivers/net/cnxk/cn10k_tx_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_mseg.c @@ -18,7 +18,8 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \ - (flags) | NIX_TX_MULTI_SEG_F); \ + 0, (flags) \ + | NIX_TX_MULTI_SEG_F); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 34e3737501..a0350496ab 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -18,7 +18,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ - (flags)); \ + 0, (flags)); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c index 1fad81dbad..7f98f79b97 100644 --- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector( \ - tx_queue, tx_pkts, pkts, cmd, \ + tx_queue, tx_pkts, pkts, cmd, 0, \ (flags) | NIX_TX_MULTI_SEG_F); \ } -- 2.17.1
On Mon, Jun 21, 2021 at 1:59 AM <pbhagavatula@marvell.com> wrote: > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Add multi-segment Rx vector routine, form the primary mbufs using > vector path switch to scalar path when extracting segments. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > --- > Depends-on: http://patches.dpdk.org/project/dpdk/list/?series=17394 Now that net/cnxk related changes merged to dpdk-next-net-mrvl/for-next-net, Could you rebase and send separate series based on dpdk-next-net-mrvl/for-next-net for net/cnxk related changes. > > v3 Changes: > - Spell check. > > drivers/net/cnxk/cn10k_rx.c | 31 +++++++++++------ > drivers/net/cnxk/cn10k_rx.h | 51 +++++++++++++++++++++------- > drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++ > drivers/net/cnxk/cn9k_rx.c | 31 +++++++++++------ > drivers/net/cnxk/cn9k_rx.h | 51 +++++++++++++++++++++------- > drivers/net/cnxk/cn9k_rx_vec_mseg.c | 18 ++++++++++ > drivers/net/cnxk/meson.build | 2 ++ > 7 files changed, 157 insertions(+), 44 deletions(-) > create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c > create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c > > diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c > index 5c956c06b4..3a9fd71309 100644 > --- a/drivers/net/cnxk/cn10k_rx.c > +++ b/drivers/net/cnxk/cn10k_rx.c > @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev, > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)] > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)] > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)]; > + > + rte_atomic_thread_fence(__ATOMIC_RELEASE); > } > > void > @@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev) > #undef R > }; > > - /* For PTP enabled, scalar rx function should be chosen as most of the > - * PTP apps are implemented to rx burst 1 pkt. > - */ > - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) > - pick_rx_func(eth_dev, nix_eth_rx_burst); > - else > - pick_rx_func(eth_dev, nix_eth_rx_vec_burst); > + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = { > +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ > + [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name, > > - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); > + NIX_RX_FASTPATH_MODES > +#undef R > + }; > > /* Copy multi seg version with no offload for tear down sequence */ > if (rte_eal_process_type() == RTE_PROC_PRIMARY) > dev->rx_pkt_burst_no_offload = > nix_eth_rx_burst_mseg[0][0][0][0][0][0]; > - rte_mb(); > + > + /* For PTP enabled, scalar rx function should be chosen as most of the > + * PTP apps are implemented to rx burst 1 pkt. > + */ > + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { > + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); > + return pick_rx_func(eth_dev, nix_eth_rx_burst); > + } > + > + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg); > + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst); > } > diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h > index 1cc37cbaa0..5926ff7f46 100644 > --- a/drivers/net/cnxk/cn10k_rx.h > +++ b/drivers/net/cnxk/cn10k_rx.h > @@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, > > sg = *(const uint64_t *)(rx + 1); > nb_segs = (sg >> 48) & 0x3; > - mbuf->nb_segs = nb_segs; > + > + if (nb_segs == 1) { > + mbuf->next = NULL; > + return; > + } > + > + mbuf->pkt_len = rx->pkt_lenm1 + 1; > mbuf->data_len = sg & 0xFFFF; > + mbuf->nb_segs = nb_segs; > sg = sg >> 16; > > eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1)); > @@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, > ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf); > > mbuf->ol_flags = ol_flags; > - *(uint64_t *)(&mbuf->rearm_data) = val; > mbuf->pkt_len = len; > + mbuf->data_len = len; > + *(uint64_t *)(&mbuf->rearm_data) = val; > > - if (flag & NIX_RX_MULTI_SEG_F) { > + if (flag & NIX_RX_MULTI_SEG_F) > nix_cqe_xtract_mseg(rx, mbuf, val); > - } else { > - mbuf->data_len = len; > + else > mbuf->next = NULL; > - } > } > > static inline uint16_t > @@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, > vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); > vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); > > - /* Update that no more segments */ > - mbuf0->next = NULL; > - mbuf1->next = NULL; > - mbuf2->next = NULL; > - mbuf3->next = NULL; > - > /* Store the mbufs to rx_pkts */ > vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); > vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); > > + if (flags & NIX_RX_MULTI_SEG_F) { > + /* Multi segment is enable build mseg list for > + * individual mbufs in scalar mode. > + */ > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(0) + 8), mbuf0, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(1) + 8), mbuf1, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(2) + 8), mbuf2, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(3) + 8), mbuf3, > + mbuf_initializer); > + } else { > + /* Update that no more segments */ > + mbuf0->next = NULL; > + mbuf1->next = NULL; > + mbuf2->next = NULL; > + mbuf3->next = NULL; > + } > + > /* Prefetch mbufs */ > roc_prefetch_store_keep(mbuf0); > roc_prefetch_store_keep(mbuf1); > @@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \ > void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ > \ > uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name( \ > + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ > + \ > + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ > void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); > > NIX_RX_FASTPATH_MODES > diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c > new file mode 100644 > index 0000000000..04d1e46c82 > --- /dev/null > +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c > @@ -0,0 +1,17 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(C) 2021 Marvell. > + */ > + > +#include "cn10k_ethdev.h" > +#include "cn10k_rx.h" > + > +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ > + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ > + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ > + { \ > + return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ > + (flags) | NIX_RX_MULTI_SEG_F); \ > + } > + > +NIX_RX_FASTPATH_MODES > +#undef R > diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c > index 0acedd0a1f..d293d4eac3 100644 > --- a/drivers/net/cnxk/cn9k_rx.c > +++ b/drivers/net/cnxk/cn9k_rx.c > @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev, > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)] > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)] > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)]; > + > + rte_atomic_thread_fence(__ATOMIC_RELEASE); > } > > void > @@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev) > #undef R > }; > > - /* For PTP enabled, scalar rx function should be chosen as most of the > - * PTP apps are implemented to rx burst 1 pkt. > - */ > - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) > - pick_rx_func(eth_dev, nix_eth_rx_burst); > - else > - pick_rx_func(eth_dev, nix_eth_rx_vec_burst); > + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = { > +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ > + [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name, > > - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); > + NIX_RX_FASTPATH_MODES > +#undef R > + }; > > /* Copy multi seg version with no offload for tear down sequence */ > if (rte_eal_process_type() == RTE_PROC_PRIMARY) > dev->rx_pkt_burst_no_offload = > nix_eth_rx_burst_mseg[0][0][0][0][0][0]; > - rte_mb(); > + > + /* For PTP enabled, scalar rx function should be chosen as most of the > + * PTP apps are implemented to rx burst 1 pkt. > + */ > + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { > + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); > + return pick_rx_func(eth_dev, nix_eth_rx_burst); > + } > + > + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg); > + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst); > } > diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h > index 10ef5c6905..5ae9e8195c 100644 > --- a/drivers/net/cnxk/cn9k_rx.h > +++ b/drivers/net/cnxk/cn9k_rx.h > @@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, > > sg = *(const uint64_t *)(rx + 1); > nb_segs = (sg >> 48) & 0x3; > - mbuf->nb_segs = nb_segs; > + > + if (nb_segs == 1) { > + mbuf->next = NULL; > + return; > + } > + > + mbuf->pkt_len = rx->pkt_lenm1 + 1; > mbuf->data_len = sg & 0xFFFF; > + mbuf->nb_segs = nb_segs; > sg = sg >> 16; > > eol = ((const rte_iova_t *)(rx + 1) + > @@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, > nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf); > > mbuf->ol_flags = ol_flags; > - *(uint64_t *)(&mbuf->rearm_data) = val; > mbuf->pkt_len = len; > + mbuf->data_len = len; > + *(uint64_t *)(&mbuf->rearm_data) = val; > > - if (flag & NIX_RX_MULTI_SEG_F) { > + if (flag & NIX_RX_MULTI_SEG_F) > nix_cqe_xtract_mseg(rx, mbuf, val); > - } else { > - mbuf->data_len = len; > + else > mbuf->next = NULL; > - } > } > > static inline uint16_t > @@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, > vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); > vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); > > - /* Update that no more segments */ > - mbuf0->next = NULL; > - mbuf1->next = NULL; > - mbuf2->next = NULL; > - mbuf3->next = NULL; > - > /* Store the mbufs to rx_pkts */ > vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); > vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); > > + if (flags & NIX_RX_MULTI_SEG_F) { > + /* Multi segment is enable build mseg list for > + * individual mbufs in scalar mode. > + */ > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(0) + 8), mbuf0, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(1) + 8), mbuf1, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(2) + 8), mbuf2, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(3) + 8), mbuf3, > + mbuf_initializer); > + } else { > + /* Update that no more segments */ > + mbuf0->next = NULL; > + mbuf1->next = NULL; > + mbuf2->next = NULL; > + mbuf3->next = NULL; > + } > + > /* Prefetch mbufs */ > roc_prefetch_store_keep(mbuf0); > roc_prefetch_store_keep(mbuf1); > @@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \ > void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ > \ > uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \ > + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ > + \ > + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \ > void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); > > NIX_RX_FASTPATH_MODES > diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c > new file mode 100644 > index 0000000000..e46d8a4749 > --- /dev/null > +++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c > @@ -0,0 +1,18 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(C) 2021 Marvell. > + */ > + > +#include "cn9k_ethdev.h" > +#include "cn9k_rx.h" > + > +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ > + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \ > + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ > + { \ > + return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ > + (flags) | \ > + NIX_RX_MULTI_SEG_F); \ > + } > + > +NIX_RX_FASTPATH_MODES > +#undef R > diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build > index 2071d0dcb2..aa8c7253fb 100644 > --- a/drivers/net/cnxk/meson.build > +++ b/drivers/net/cnxk/meson.build > @@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c', > 'cn9k_rx.c', > 'cn9k_rx_mseg.c', > 'cn9k_rx_vec.c', > + 'cn9k_rx_vec_mseg.c', > 'cn9k_tx.c', > 'cn9k_tx_mseg.c', > 'cn9k_tx_vec.c') > @@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c', > 'cn10k_rx.c', > 'cn10k_rx_mseg.c', > 'cn10k_rx_vec.c', > + 'cn10k_rx_vec_mseg.c', > 'cn10k_tx.c', > 'cn10k_tx_mseg.c', > 'cn10k_tx_vec.c') > -- > 2.17.1 >
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add multi-segment Rx vector routine, form the primary mbufs using vector path switch to scalar path when extracting segments. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- v4 Changes: - Split patches for easier merge. - Rebase on dpdk-next-net-mrvl. v3 Changes: - Spell check. drivers/net/cnxk/cn10k_rx.c | 31 +++++++++++------ drivers/net/cnxk/cn10k_rx.h | 51 +++++++++++++++++++++------- drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++ drivers/net/cnxk/cn9k_rx.c | 31 +++++++++++------ drivers/net/cnxk/cn9k_rx.h | 51 +++++++++++++++++++++------- drivers/net/cnxk/cn9k_rx_vec_mseg.c | 18 ++++++++++ drivers/net/cnxk/meson.build | 2 ++ 7 files changed, 157 insertions(+), 44 deletions(-) create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c index 5c956c06b..3a9fd7130 100644 --- a/drivers/net/cnxk/cn10k_rx.c +++ b/drivers/net/cnxk/cn10k_rx.c @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev, [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)]; + + rte_atomic_thread_fence(__ATOMIC_RELEASE); } void @@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev) #undef R }; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) - pick_rx_func(eth_dev, nix_eth_rx_burst); - else - pick_rx_func(eth_dev, nix_eth_rx_vec_burst); + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name, - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + NIX_RX_FASTPATH_MODES +#undef R + }; /* Copy multi seg version with no offload for tear down sequence */ if (rte_eal_process_type() == RTE_PROC_PRIMARY) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - rte_mb(); + + /* For PTP enabled, scalar rx function should be chosen as most of the + * PTP apps are implemented to rx burst 1 pkt. + */ + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_burst); + } + + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst); } diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index 1cc37cbaa..5926ff7f4 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, sg = *(const uint64_t *)(rx + 1); nb_segs = (sg >> 48) & 0x3; - mbuf->nb_segs = nb_segs; + + if (nb_segs == 1) { + mbuf->next = NULL; + return; + } + + mbuf->pkt_len = rx->pkt_lenm1 + 1; mbuf->data_len = sg & 0xFFFF; + mbuf->nb_segs = nb_segs; sg = sg >> 16; eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1)); @@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf); mbuf->ol_flags = ol_flags; - *(uint64_t *)(&mbuf->rearm_data) = val; mbuf->pkt_len = len; + mbuf->data_len = len; + *(uint64_t *)(&mbuf->rearm_data) = val; - if (flag & NIX_RX_MULTI_SEG_F) { + if (flag & NIX_RX_MULTI_SEG_F) nix_cqe_xtract_mseg(rx, mbuf, val); - } else { - mbuf->data_len = len; + else mbuf->next = NULL; - } } static inline uint16_t @@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); - /* Update that no more segments */ - mbuf0->next = NULL; - mbuf1->next = NULL; - mbuf2->next = NULL; - mbuf3->next = NULL; - /* Store the mbufs to rx_pkts */ vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + if (flags & NIX_RX_MULTI_SEG_F) { + /* Multi segment is enable build mseg list for + * individual mbufs in scalar mode. + */ + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer); + } else { + /* Update that no more segments */ + mbuf0->next = NULL; + mbuf1->next = NULL; + mbuf2->next = NULL; + mbuf3->next = NULL; + } + /* Prefetch mbufs */ roc_prefetch_store_keep(mbuf0); roc_prefetch_store_keep(mbuf1); @@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c new file mode 100644 index 000000000..04d1e46c8 --- /dev/null +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_ethdev.h" +#include "cn10k_rx.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ + { \ + return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ + (flags) | NIX_RX_MULTI_SEG_F); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c index 0acedd0a1..d293d4eac 100644 --- a/drivers/net/cnxk/cn9k_rx.c +++ b/drivers/net/cnxk/cn9k_rx.c @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev, [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)]; + + rte_atomic_thread_fence(__ATOMIC_RELEASE); } void @@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev) #undef R }; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) - pick_rx_func(eth_dev, nix_eth_rx_burst); - else - pick_rx_func(eth_dev, nix_eth_rx_vec_burst); + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name, - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + NIX_RX_FASTPATH_MODES +#undef R + }; /* Copy multi seg version with no offload for tear down sequence */ if (rte_eal_process_type() == RTE_PROC_PRIMARY) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - rte_mb(); + + /* For PTP enabled, scalar rx function should be chosen as most of the + * PTP apps are implemented to rx burst 1 pkt. + */ + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_burst); + } + + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst); } diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h index 10ef5c690..5ae9e8195 100644 --- a/drivers/net/cnxk/cn9k_rx.h +++ b/drivers/net/cnxk/cn9k_rx.h @@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, sg = *(const uint64_t *)(rx + 1); nb_segs = (sg >> 48) & 0x3; - mbuf->nb_segs = nb_segs; + + if (nb_segs == 1) { + mbuf->next = NULL; + return; + } + + mbuf->pkt_len = rx->pkt_lenm1 + 1; mbuf->data_len = sg & 0xFFFF; + mbuf->nb_segs = nb_segs; sg = sg >> 16; eol = ((const rte_iova_t *)(rx + 1) + @@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf); mbuf->ol_flags = ol_flags; - *(uint64_t *)(&mbuf->rearm_data) = val; mbuf->pkt_len = len; + mbuf->data_len = len; + *(uint64_t *)(&mbuf->rearm_data) = val; - if (flag & NIX_RX_MULTI_SEG_F) { + if (flag & NIX_RX_MULTI_SEG_F) nix_cqe_xtract_mseg(rx, mbuf, val); - } else { - mbuf->data_len = len; + else mbuf->next = NULL; - } } static inline uint16_t @@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); - /* Update that no more segments */ - mbuf0->next = NULL; - mbuf1->next = NULL; - mbuf2->next = NULL; - mbuf3->next = NULL; - /* Store the mbufs to rx_pkts */ vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + if (flags & NIX_RX_MULTI_SEG_F) { + /* Multi segment is enable build mseg list for + * individual mbufs in scalar mode. + */ + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer); + } else { + /* Update that no more segments */ + mbuf0->next = NULL; + mbuf1->next = NULL; + mbuf2->next = NULL; + mbuf3->next = NULL; + } + /* Prefetch mbufs */ roc_prefetch_store_keep(mbuf0); roc_prefetch_store_keep(mbuf1); @@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c new file mode 100644 index 000000000..e46d8a474 --- /dev/null +++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_ethdev.h" +#include "cn9k_rx.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ + { \ + return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ + (flags) | \ + NIX_RX_MULTI_SEG_F); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build index 2071d0dcb..aa8c7253f 100644 --- a/drivers/net/cnxk/meson.build +++ b/drivers/net/cnxk/meson.build @@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c', 'cn9k_rx.c', 'cn9k_rx_mseg.c', 'cn9k_rx_vec.c', + 'cn9k_rx_vec_mseg.c', 'cn9k_tx.c', 'cn9k_tx_mseg.c', 'cn9k_tx_vec.c') @@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c', 'cn10k_rx.c', 'cn10k_rx_mseg.c', 'cn10k_rx_vec.c', + 'cn10k_rx_vec_mseg.c', 'cn10k_tx.c', 'cn10k_tx_mseg.c', 'cn10k_tx_vec.c') -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable PTP offload in vector Rx burst function, use vector path for processing mbufs and finally switch to scalar when extracting timestamp. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_ethdev.c | 1 - drivers/net/cnxk/cn10k_rx.c | 5 +- drivers/net/cnxk/cn10k_rx.h | 124 ++++++++++++++++++++++++++++---- drivers/net/cnxk/cn10k_rx_vec.c | 3 - drivers/net/cnxk/cn9k_ethdev.c | 1 - drivers/net/cnxk/cn9k_rx.c | 5 +- drivers/net/cnxk/cn9k_rx.h | 124 ++++++++++++++++++++++++++++---- drivers/net/cnxk/cn9k_rx_vec.c | 3 - drivers/net/cnxk/cnxk_ethdev.h | 19 ++--- 9 files changed, 232 insertions(+), 53 deletions(-) diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c index b079edbd3..7caec6cf1 100644 --- a/drivers/net/cnxk/cn10k_ethdev.c +++ b/drivers/net/cnxk/cn10k_ethdev.c @@ -301,7 +301,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev) if (nix_recalc_mtu(eth_dev)) plt_err("Failed to set MTU size for ptp"); - dev->scalar_ena = true; dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F; /* Setting up the function pointers as per new offload flags */ diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c index 3a9fd7130..69e767ac3 100644 --- a/drivers/net/cnxk/cn10k_rx.c +++ b/drivers/net/cnxk/cn10k_rx.c @@ -75,10 +75,7 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->scalar_ena) { if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); return pick_rx_func(eth_dev, nix_eth_rx_burst); diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index 5926ff7f4..d9572b19e 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -109,7 +109,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags, static __rte_always_inline void nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, - uint64_t rearm) + uint64_t rearm, const uint16_t flags) { const rte_iova_t *iova_list; struct rte_mbuf *head; @@ -125,8 +125,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, return; } - mbuf->pkt_len = rx->pkt_lenm1 + 1; - mbuf->data_len = sg & 0xFFFF; + mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); + mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); mbuf->nb_segs = nb_segs; sg = sg >> 16; @@ -207,7 +209,7 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, *(uint64_t *)(&mbuf->rearm_data) = val; if (flag & NIX_RX_MULTI_SEG_F) - nix_cqe_xtract_mseg(rx, mbuf, val); + nix_cqe_xtract_mseg(rx, mbuf, val, flag); else mbuf->next = NULL; } @@ -272,8 +274,9 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, flags); cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp, (flags & NIX_RX_OFFLOAD_TSTAMP_F), - (uint64_t *)((uint8_t *)mbuf + data_off) - ); + (flags & NIX_RX_MULTI_SEG_F), + (uint64_t *)((uint8_t *)mbuf + + data_off)); rx_pkts[packets++] = mbuf; roc_prefetch_store_keep(mbuf); head++; @@ -469,6 +472,99 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, mbuf3); } + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { + const uint16x8_t len_off = { + 0, /* ptype 0:15 */ + 0, /* ptype 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/ + 0, /* pktlen 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */ + 0, + 0, + 0}; + const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC}; + const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | + PKT_RX_IEEE1588_TMST | + rxq->tstamp->rx_tstamp_dynflag; + const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; + uint64x2_t ts01, ts23, mask; + uint64_t ts[4]; + uint8_t res; + + /* Subtract timesync length from total pkt length. */ + f0 = vsubq_u16(f0, len_off); + f1 = vsubq_u16(f1, len_off); + f2 = vsubq_u16(f2, len_off); + f3 = vsubq_u16(f3, len_off); + + /* Get the address of actual timestamp. */ + ts01 = vaddq_u64(mbuf01, data_off); + ts23 = vaddq_u64(mbuf23, data_off); + /* Load timestamp from address. */ + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 0), + ts01, 0); + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 1), + ts01, 1); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 0), + ts23, 0); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 1), + ts23, 1); + /* Convert from be to cpu byteorder. */ + ts01 = vrev64q_u8(ts01); + ts23 = vrev64q_u8(ts23); + /* Store timestamp into scalar for later use. */ + ts[0] = vgetq_lane_u64(ts01, 0); + ts[1] = vgetq_lane_u64(ts01, 1); + ts[2] = vgetq_lane_u64(ts23, 0); + ts[3] = vgetq_lane_u64(ts23, 1); + + /* Store timestamp into dynfield. */ + *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = + ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = + ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = + ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = + ts[3]; + + /* Generate ptype mask to filter L2 ether timesync */ + mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); + mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1); + mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2); + mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3); + + /* Match against L2 ether timesync. */ + mask = vceqq_u32(mask, ptype); + /* Convert from vector from scalar mask */ + res = vaddvq_u32(vandq_u32(mask, and_mask)); + res &= 0xF; + + if (res) { + /* Fill in the ol_flags for any packets that + * matched. + */ + ol_flags0 |= ((res & 0x1) ? ts_olf : 0); + ol_flags1 |= ((res & 0x2) ? ts_olf : 0); + ol_flags2 |= ((res & 0x4) ? ts_olf : 0); + ol_flags3 |= ((res & 0x8) ? ts_olf : 0); + + /* Update Rxq timestamp with the latest + * timestamp. + */ + rxq->tstamp->rx_ready = 1; + rxq->tstamp->rx_tstamp = + ts[31 - __builtin_clz(res)]; + } + } + /* Form rearm_data with ol_flags */ rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1); rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1); @@ -496,17 +592,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer); + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer); + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer); + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer); + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c index 65ffa9784..93528a44f 100644 --- a/drivers/net/cnxk/cn10k_rx_vec.c +++ b/drivers/net/cnxk/cn10k_rx_vec.c @@ -11,9 +11,6 @@ struct rte_mbuf **rx_pkts, \ uint16_t pkts) \ { \ - /* TSTMP is not supported by vector */ \ - if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \ - return 0; \ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ (flags)); \ } diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c index 994fdb7c3..115e67891 100644 --- a/drivers/net/cnxk/cn9k_ethdev.c +++ b/drivers/net/cnxk/cn9k_ethdev.c @@ -309,7 +309,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev) if (nix_recalc_mtu(eth_dev)) plt_err("Failed to set MTU size for ptp"); - dev->scalar_ena = true; dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F; /* Setting up the function pointers as per new offload flags */ diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c index d293d4eac..7d9f1bd61 100644 --- a/drivers/net/cnxk/cn9k_rx.c +++ b/drivers/net/cnxk/cn9k_rx.c @@ -75,10 +75,7 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->scalar_ena) { if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); return pick_rx_func(eth_dev, nix_eth_rx_burst); diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h index 5ae9e8195..beb52f39d 100644 --- a/drivers/net/cnxk/cn9k_rx.h +++ b/drivers/net/cnxk/cn9k_rx.h @@ -110,7 +110,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags, static __rte_always_inline void nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, - uint64_t rearm) + uint64_t rearm, const uint16_t flags) { const rte_iova_t *iova_list; struct rte_mbuf *head; @@ -126,8 +126,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, return; } - mbuf->pkt_len = rx->pkt_lenm1 + 1; - mbuf->data_len = sg & 0xFFFF; + mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); + mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); mbuf->nb_segs = nb_segs; sg = sg >> 16; @@ -210,7 +212,7 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, *(uint64_t *)(&mbuf->rearm_data) = val; if (flag & NIX_RX_MULTI_SEG_F) - nix_cqe_xtract_mseg(rx, mbuf, val); + nix_cqe_xtract_mseg(rx, mbuf, val, flag); else mbuf->next = NULL; } @@ -275,8 +277,9 @@ cn9k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, flags); cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp, (flags & NIX_RX_OFFLOAD_TSTAMP_F), - (uint64_t *)((uint8_t *)mbuf + data_off) - ); + (flags & NIX_RX_MULTI_SEG_F), + (uint64_t *)((uint8_t *)mbuf + + data_off)); rx_pkts[packets++] = mbuf; roc_prefetch_store_keep(mbuf); head++; @@ -472,6 +475,99 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, mbuf3); } + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { + const uint16x8_t len_off = { + 0, /* ptype 0:15 */ + 0, /* ptype 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/ + 0, /* pktlen 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */ + 0, + 0, + 0}; + const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC}; + const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | + PKT_RX_IEEE1588_TMST | + rxq->tstamp->rx_tstamp_dynflag; + const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; + uint64x2_t ts01, ts23, mask; + uint64_t ts[4]; + uint8_t res; + + /* Subtract timesync length from total pkt length. */ + f0 = vsubq_u16(f0, len_off); + f1 = vsubq_u16(f1, len_off); + f2 = vsubq_u16(f2, len_off); + f3 = vsubq_u16(f3, len_off); + + /* Get the address of actual timestamp. */ + ts01 = vaddq_u64(mbuf01, data_off); + ts23 = vaddq_u64(mbuf23, data_off); + /* Load timestamp from address. */ + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 0), + ts01, 0); + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 1), + ts01, 1); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 0), + ts23, 0); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 1), + ts23, 1); + /* Convert from be to cpu byteorder. */ + ts01 = vrev64q_u8(ts01); + ts23 = vrev64q_u8(ts23); + /* Store timestamp into scalar for later use. */ + ts[0] = vgetq_lane_u64(ts01, 0); + ts[1] = vgetq_lane_u64(ts01, 1); + ts[2] = vgetq_lane_u64(ts23, 0); + ts[3] = vgetq_lane_u64(ts23, 1); + + /* Store timestamp into dynfield. */ + *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = + ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = + ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = + ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = + ts[3]; + + /* Generate ptype mask to filter L2 ether timesync */ + mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); + mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1); + mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2); + mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3); + + /* Match against L2 ether timesync. */ + mask = vceqq_u32(mask, ptype); + /* Convert from vector from scalar mask */ + res = vaddvq_u32(vandq_u32(mask, and_mask)); + res &= 0xF; + + if (res) { + /* Fill in the ol_flags for any packets that + * matched. + */ + ol_flags0 |= ((res & 0x1) ? ts_olf : 0); + ol_flags1 |= ((res & 0x2) ? ts_olf : 0); + ol_flags2 |= ((res & 0x4) ? ts_olf : 0); + ol_flags3 |= ((res & 0x8) ? ts_olf : 0); + + /* Update Rxq timestamp with the latest + * timestamp. + */ + rxq->tstamp->rx_ready = 1; + rxq->tstamp->rx_tstamp = + ts[31 - __builtin_clz(res)]; + } + } + /* Form rearm_data with ol_flags */ rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1); rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1); @@ -499,17 +595,17 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer); + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer); + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer); + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer); + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; diff --git a/drivers/net/cnxk/cn9k_rx_vec.c b/drivers/net/cnxk/cn9k_rx_vec.c index e61c2225c..ef5f771ef 100644 --- a/drivers/net/cnxk/cn9k_rx_vec.c +++ b/drivers/net/cnxk/cn9k_rx_vec.c @@ -9,9 +9,6 @@ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ { \ - /* TSTMP is not supported by vector */ \ - if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \ - return 0; \ return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ (flags)); \ } diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h index 67b1f4253..4eead0390 100644 --- a/drivers/net/cnxk/cnxk_ethdev.h +++ b/drivers/net/cnxk/cnxk_ethdev.h @@ -136,13 +136,12 @@ struct cnxk_eth_qconf { }; struct cnxk_timesync_info { + uint8_t rx_ready; + uint64_t rx_tstamp; uint64_t rx_tstamp_dynflag; + int tstamp_dynfield_offset; rte_iova_t tx_tstamp_iova; uint64_t *tx_tstamp; - uint64_t rx_tstamp; - int tstamp_dynfield_offset; - uint8_t tx_ready; - uint8_t rx_ready; } __plt_cache_aligned; struct cnxk_eth_dev { @@ -465,13 +464,15 @@ cnxk_nix_timestamp_dynfield(struct rte_mbuf *mbuf, static __rte_always_inline void cnxk_nix_mbuf_to_tstamp(struct rte_mbuf *mbuf, - struct cnxk_timesync_info *tstamp, bool ts_enable, + struct cnxk_timesync_info *tstamp, + const uint8_t ts_enable, const uint8_t mseg_enable, uint64_t *tstamp_ptr) { - if (ts_enable && - (mbuf->data_off == - RTE_PKTMBUF_HEADROOM + CNXK_NIX_TIMESYNC_RX_OFFSET)) { - mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET; + if (ts_enable) { + if (!mseg_enable) { + mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET; + mbuf->data_len -= CNXK_NIX_TIMESYNC_RX_OFFSET; + } /* Reading the rx timestamp inserted by CGX, viz at * starting of the packet data. -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable VLAN offload in vector Tx burst function. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 3 +- drivers/net/cnxk/cn10k_tx.h | 125 +++++++++++++++++++++++++++---- drivers/net/cnxk/cn10k_tx_vec.c | 3 +- drivers/net/cnxk/cn9k_tx.c | 3 +- drivers/net/cnxk/cn9k_tx.h | 128 ++++++++++++++++++++++++++++---- drivers/net/cnxk/cn9k_tx_vec.c | 3 +- 6 files changed, 227 insertions(+), 38 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 18694dc70..05bc163a4 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -69,8 +69,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) if (dev->scalar_ena || (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | - NIX_TX_OFFLOAD_TSO_F))) + (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 8b1446f25..1e1697858 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -62,9 +62,14 @@ cn10k_nix_tx_ext_subs(const uint16_t flags) static __rte_always_inline uint8_t cn10k_nix_pkts_per_vec_brst(const uint16_t flags) { - RTE_SET_USED(flags); - /* We can pack up to 4 packets per LMTLINE if there are no offloads. */ - return 4 << ROC_LMT_LINES_PER_CORE_LOG2; + return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4) + << ROC_LMT_LINES_PER_CORE_LOG2; +} + +static __rte_always_inline uint8_t +cn10k_nix_tx_dwords_per_line(const uint16_t flags) +{ + return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8; } static __rte_always_inline uint64_t @@ -98,10 +103,9 @@ cn10k_nix_tx_steor_data(const uint16_t flags) static __rte_always_inline uint64_t cn10k_nix_tx_steor_vec_data(const uint16_t flags) { - const uint64_t dw_m1 = 0x7; + const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1; uint64_t data; - RTE_SET_USED(flags); /* This will be moved to addr area */ data = dw_m1; /* 15 vector sizes for single seg */ @@ -690,11 +694,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; - uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP]; + uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], + cmd2[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint16_t left, scalar, burst, i, lmt_id; + uint64x2_t sendext01_w0, sendext23_w0; + uint64x2_t sendext01_w1, sendext23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn10k_eth_txq *txq = tx_queue; @@ -720,6 +727,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0); sgdesc23_w0 = sgdesc01_w0; + /* Load command defaults into vector variables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]); + sendext23_w0 = sendext01_w0; + sendext01_w1 = vdupq_n_u64(12 | 12U << 24); + sendext23_w1 = sendext01_w1; + } + /* Get LMT base address and LMT ID as lcore id */ ROC_LMT_BASE_ID_GET(laddr, lmt_id); left = pkts; @@ -738,6 +753,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc23_w0 = senddesc01_w0; sgdesc23_w0 = sgdesc01_w0; + /* Clear vlan enables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w1 = vbicq_u64(sendext01_w1, + vdupq_n_u64(0x3FFFF00FFFF00)); + sendext23_w1 = sendext01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1303,6 +1325,52 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); + if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) { + /* Tx ol_flag for vlan. */ + const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN}; + /* Bit enable for VLAN1 */ + const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)}; + /* Tx ol_flag for QnQ. */ + const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ}; + /* Bit enable for VLAN0 */ + const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)}; + /* Load vlan values from packet. outer is VLAN 0 */ + uint64x2_t ext01 = { + ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[0]->vlan_tci) << 32, + ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[1]->vlan_tci) << 32, + }; + uint64x2_t ext23 = { + ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[2]->vlan_tci) << 32, + ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[3]->vlan_tci) << 32, + }; + + /* Get ol_flags of the packets. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* ORR vlan outer/inner values into cmd. */ + sendext01_w1 = vorrq_u64(sendext01_w1, ext01); + sendext23_w1 = vorrq_u64(sendext23_w1, ext23); + + /* Test for offload enable bits and generate masks. */ + xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv), + mlv), + vandq_u64(vtstq_u64(xtmp128, olq), + mlq)); + ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv), + mlv), + vandq_u64(vtstq_u64(ytmp128, olq), + mlq)); + + /* Set vlan enable bits into cmd based on mask. */ + sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128); + sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1381,16 +1449,41 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1); cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1); - /* Store the prepared send desc to LMT lines */ - vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]); - vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]); - lnum += 1; + if (flags & NIX_TX_NEED_EXT_HDR) { + cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1); + cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1); + cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1); + cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); + } + + if (flags & NIX_TX_NEED_EXT_HDR) { + /* Store the prepared send desc to LMT lines */ + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]); + lnum += 1; + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]); + lnum += 1; + } else { + /* Store the prepared send desc to LMT lines */ + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]); + lnum += 1; + } tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 7453f3bc9..beb5c649b 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -14,8 +14,7 @@ uint64_t cmd[sz]; \ \ /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \ - (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ + if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ (flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index b80260607..4b43cdaff 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -68,8 +68,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) if (dev->scalar_ena || (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | - NIX_TX_OFFLOAD_TSO_F))) + (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index 1899d6670..d5715bb52 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -552,10 +552,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; - uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP]; + uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], + cmd2[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; + uint64x2_t sendext01_w0, sendext23_w0; + uint64x2_t sendext01_w1, sendext23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn9k_eth_txq *txq = tx_queue; @@ -585,8 +588,19 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc23_w0 = senddesc01_w0; senddesc01_w1 = vdupq_n_u64(0); senddesc23_w1 = senddesc01_w1; - sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]); - sgdesc23_w0 = sgdesc01_w0; + + /* Load command defaults into vector variables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]); + sendext23_w0 = sendext01_w0; + sendext01_w1 = vdupq_n_u64(12 | 12U << 24); + sendext23_w1 = sendext01_w1; + sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]); + sgdesc23_w0 = sgdesc01_w0; + } else { + sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]); + sgdesc23_w0 = sgdesc01_w0; + } for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) { /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */ @@ -597,6 +611,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc23_w0 = senddesc01_w0; sgdesc23_w0 = sgdesc01_w0; + /* Clear vlan enables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w1 = vbicq_u64(sendext01_w1, + vdupq_n_u64(0x3FFFF00FFFF00)); + sendext23_w1 = sendext01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1162,6 +1183,52 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); + if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) { + /* Tx ol_flag for vlan. */ + const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN}; + /* Bit enable for VLAN1 */ + const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)}; + /* Tx ol_flag for QnQ. */ + const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ}; + /* Bit enable for VLAN0 */ + const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)}; + /* Load vlan values from packet. outer is VLAN 0 */ + uint64x2_t ext01 = { + ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[0]->vlan_tci) << 32, + ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[1]->vlan_tci) << 32, + }; + uint64x2_t ext23 = { + ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[2]->vlan_tci) << 32, + ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[3]->vlan_tci) << 32, + }; + + /* Get ol_flags of the packets. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* ORR vlan outer/inner values into cmd. */ + sendext01_w1 = vorrq_u64(sendext01_w1, ext01); + sendext23_w1 = vorrq_u64(sendext23_w1, ext23); + + /* Test for offload enable bits and generate masks. */ + xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv), + mlv), + vandq_u64(vtstq_u64(xtmp128, olq), + mlq)); + ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv), + mlv), + vandq_u64(vtstq_u64(ytmp128, olq), + mlq)); + + /* Set vlan enable bits into cmd based on mask. */ + sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128); + sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1247,17 +1314,50 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1); cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1); - do { - vst1q_u64(lmt_addr, cmd0[0]); - vst1q_u64(lmt_addr + 2, cmd1[0]); - vst1q_u64(lmt_addr + 4, cmd0[1]); - vst1q_u64(lmt_addr + 6, cmd1[1]); - vst1q_u64(lmt_addr + 8, cmd0[2]); - vst1q_u64(lmt_addr + 10, cmd1[2]); - vst1q_u64(lmt_addr + 12, cmd0[3]); - vst1q_u64(lmt_addr + 14, cmd1[3]); - lmt_status = roc_lmt_submit_ldeor(io_addr); - } while (lmt_status == 0); + if (flags & NIX_TX_NEED_EXT_HDR) { + cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1); + cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1); + cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1); + cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); + } + + if (flags & NIX_TX_NEED_EXT_HDR) { + /* With ext header in the command we can no longer send + * all 4 packets together since LMTLINE is 128bytes. + * Split and Tx twice. + */ + do { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd2[0]); + vst1q_u64(lmt_addr + 4, cmd1[0]); + vst1q_u64(lmt_addr + 6, cmd0[1]); + vst1q_u64(lmt_addr + 8, cmd2[1]); + vst1q_u64(lmt_addr + 10, cmd1[1]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + + do { + vst1q_u64(lmt_addr, cmd0[2]); + vst1q_u64(lmt_addr + 2, cmd2[2]); + vst1q_u64(lmt_addr + 4, cmd1[2]); + vst1q_u64(lmt_addr + 6, cmd0[3]); + vst1q_u64(lmt_addr + 8, cmd2[3]); + vst1q_u64(lmt_addr + 10, cmd1[3]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + } else { + do { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd1[0]); + vst1q_u64(lmt_addr + 4, cmd0[1]); + vst1q_u64(lmt_addr + 6, cmd1[1]); + vst1q_u64(lmt_addr + 8, cmd0[2]); + vst1q_u64(lmt_addr + 10, cmd1[2]); + vst1q_u64(lmt_addr + 12, cmd0[3]); + vst1q_u64(lmt_addr + 14, cmd1[3]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + } tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c index a6e7c9e54..5842facb5 100644 --- a/drivers/net/cnxk/cn9k_tx_vec.c +++ b/drivers/net/cnxk/cn9k_tx_vec.c @@ -14,8 +14,7 @@ uint64_t cmd[sz]; \ \ /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \ - (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ + if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ (flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable PTP offload in vector Tx burst function. Since, we can no-longer use a single LMT line for burst of 4, split the LMT into two and transmit twice. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 4 +- drivers/net/cnxk/cn10k_tx.h | 109 +++++++++++++++++++++++++++----- drivers/net/cnxk/cn10k_tx_vec.c | 5 +- drivers/net/cnxk/cn9k_tx.c | 4 +- drivers/net/cnxk/cn9k_tx.h | 105 ++++++++++++++++++++++++++---- drivers/net/cnxk/cn9k_tx_vec.c | 5 +- 6 files changed, 192 insertions(+), 40 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 05bc163a4..c4c3e6570 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -67,9 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || - (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) + if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 1e1697858..8af6799ff 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -69,7 +69,9 @@ cn10k_nix_pkts_per_vec_brst(const uint16_t flags) static __rte_always_inline uint8_t cn10k_nix_tx_dwords_per_line(const uint16_t flags) { - return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8; + return (flags & NIX_TX_NEED_EXT_HDR) ? + ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) : + 8; } static __rte_always_inline uint64_t @@ -695,13 +697,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], - cmd2[NIX_DESCS_PER_LOOP]; + cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint16_t left, scalar, burst, i, lmt_id; uint64x2_t sendext01_w0, sendext23_w0; uint64x2_t sendext01_w1, sendext23_w1; + uint64x2_t sendmem01_w0, sendmem23_w0; + uint64x2_t sendmem01_w1, sendmem23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn10k_eth_txq *txq = tx_queue; @@ -733,6 +737,12 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w0 = sendext01_w0; sendext01_w1 = vdupq_n_u64(12 | 12U << 24); sendext23_w1 = sendext01_w1; + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]); + sendmem23_w0 = sendmem01_w0; + sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]); + sendmem23_w1 = sendmem01_w1; + } } /* Get LMT base address and LMT ID as lcore id */ @@ -760,6 +770,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = sendext01_w1; } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Reset send mem alg to SETTSTMP from SUB*/ + sendmem01_w0 = vbicq_u64(sendmem01_w0, + vdupq_n_u64(BIT_ULL(59))); + /* Reset send mem address to default. */ + sendmem01_w1 = + vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF)); + sendmem23_w0 = sendmem01_w0; + sendmem23_w1 = sendmem01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1371,6 +1392,44 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Tx ol_flag for timestam. */ + const uint64x2_t olf = {PKT_TX_IEEE1588_TMST, + PKT_TX_IEEE1588_TMST}; + /* Set send mem alg to SUB. */ + const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)}; + /* Increment send mem address by 8. */ + const uint64x2_t addr = {0x8, 0x8}; + + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Check if timestamp is requested and generate inverted + * mask as we need not make any changes to default cmd + * value. + */ + xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128)); + ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128)); + + /* Change send mem address to an 8 byte offset when + * TSTMP is disabled. + */ + sendmem01_w1 = vaddq_u64(sendmem01_w1, + vandq_u64(xtmp128, addr)); + sendmem23_w1 = vaddq_u64(sendmem23_w1, + vandq_u64(ytmp128, addr)); + /* Change send mem alg to SUB when TSTMP is disabled. */ + sendmem01_w0 = vorrq_u64(sendmem01_w0, + vandq_u64(xtmp128, alg)); + sendmem23_w0 = vorrq_u64(sendmem23_w0, + vandq_u64(ytmp128, alg)); + + cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1); + cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1); + cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1); + cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1458,19 +1517,39 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (flags & NIX_TX_NEED_EXT_HDR) { /* Store the prepared send desc to LMT lines */ - vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]); - lnum += 1; - vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]); - vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]); - vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]); + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]); + lnum += 1; + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]); + } else { + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]); + lnum += 1; + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]); + } lnum += 1; } else { /* Store the prepared send desc to LMT lines */ diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index beb5c649b..0b4a4c7ba 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -13,9 +13,8 @@ { \ uint64_t cmd[sz]; \ \ - /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ - (flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* TSO is not supported by vec */ \ + if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ (flags)); \ diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index 4b43cdaff..c32681ed4 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -66,9 +66,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || - (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) + if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index d5715bb52..cb574a1c1 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -553,12 +553,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], - cmd2[NIX_DESCS_PER_LOOP]; + cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint64x2_t sendext01_w0, sendext23_w0; uint64x2_t sendext01_w1, sendext23_w1; + uint64x2_t sendmem01_w0, sendmem23_w0; + uint64x2_t sendmem01_w1, sendmem23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn9k_eth_txq *txq = tx_queue; @@ -597,6 +599,12 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = sendext01_w1; sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]); sgdesc23_w0 = sgdesc01_w0; + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + sendmem01_w0 = vld1q_dup_u64(&txq->cmd[6]); + sendmem23_w0 = sendmem01_w0; + sendmem01_w1 = vld1q_dup_u64(&txq->cmd[7]); + sendmem23_w1 = sendmem01_w1; + } } else { sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]); sgdesc23_w0 = sgdesc01_w0; @@ -618,6 +626,17 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = sendext01_w1; } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Reset send mem alg to SETTSTMP from SUB*/ + sendmem01_w0 = vbicq_u64(sendmem01_w0, + vdupq_n_u64(BIT_ULL(59))); + /* Reset send mem address to default. */ + sendmem01_w1 = + vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF)); + sendmem23_w0 = sendmem01_w0; + sendmem23_w1 = sendmem01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1229,6 +1248,44 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Tx ol_flag for timestam. */ + const uint64x2_t olf = {PKT_TX_IEEE1588_TMST, + PKT_TX_IEEE1588_TMST}; + /* Set send mem alg to SUB. */ + const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)}; + /* Increment send mem address by 8. */ + const uint64x2_t addr = {0x8, 0x8}; + + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Check if timestamp is requested and generate inverted + * mask as we need not make any changes to default cmd + * value. + */ + xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128)); + ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128)); + + /* Change send mem address to an 8 byte offset when + * TSTMP is disabled. + */ + sendmem01_w1 = vaddq_u64(sendmem01_w1, + vandq_u64(xtmp128, addr)); + sendmem23_w1 = vaddq_u64(sendmem23_w1, + vandq_u64(ytmp128, addr)); + /* Change send mem alg to SUB when TSTMP is disabled. */ + sendmem01_w0 = vorrq_u64(sendmem01_w0, + vandq_u64(xtmp128, alg)); + sendmem23_w0 = vorrq_u64(sendmem23_w0, + vandq_u64(ytmp128, alg)); + + cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1); + cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1); + cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1); + cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1327,22 +1384,44 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, * Split and Tx twice. */ do { - vst1q_u64(lmt_addr, cmd0[0]); - vst1q_u64(lmt_addr + 2, cmd2[0]); - vst1q_u64(lmt_addr + 4, cmd1[0]); - vst1q_u64(lmt_addr + 6, cmd0[1]); - vst1q_u64(lmt_addr + 8, cmd2[1]); - vst1q_u64(lmt_addr + 10, cmd1[1]); + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd2[0]); + vst1q_u64(lmt_addr + 4, cmd1[0]); + vst1q_u64(lmt_addr + 6, cmd3[0]); + vst1q_u64(lmt_addr + 8, cmd0[1]); + vst1q_u64(lmt_addr + 10, cmd2[1]); + vst1q_u64(lmt_addr + 12, cmd1[1]); + vst1q_u64(lmt_addr + 14, cmd3[1]); + } else { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd2[0]); + vst1q_u64(lmt_addr + 4, cmd1[0]); + vst1q_u64(lmt_addr + 6, cmd0[1]); + vst1q_u64(lmt_addr + 8, cmd2[1]); + vst1q_u64(lmt_addr + 10, cmd1[1]); + } lmt_status = roc_lmt_submit_ldeor(io_addr); } while (lmt_status == 0); do { - vst1q_u64(lmt_addr, cmd0[2]); - vst1q_u64(lmt_addr + 2, cmd2[2]); - vst1q_u64(lmt_addr + 4, cmd1[2]); - vst1q_u64(lmt_addr + 6, cmd0[3]); - vst1q_u64(lmt_addr + 8, cmd2[3]); - vst1q_u64(lmt_addr + 10, cmd1[3]); + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + vst1q_u64(lmt_addr, cmd0[2]); + vst1q_u64(lmt_addr + 2, cmd2[2]); + vst1q_u64(lmt_addr + 4, cmd1[2]); + vst1q_u64(lmt_addr + 6, cmd3[2]); + vst1q_u64(lmt_addr + 8, cmd0[3]); + vst1q_u64(lmt_addr + 10, cmd2[3]); + vst1q_u64(lmt_addr + 12, cmd1[3]); + vst1q_u64(lmt_addr + 14, cmd3[3]); + } else { + vst1q_u64(lmt_addr, cmd0[2]); + vst1q_u64(lmt_addr + 2, cmd2[2]); + vst1q_u64(lmt_addr + 4, cmd1[2]); + vst1q_u64(lmt_addr + 6, cmd0[3]); + vst1q_u64(lmt_addr + 8, cmd2[3]); + vst1q_u64(lmt_addr + 10, cmd1[3]); + } lmt_status = roc_lmt_submit_ldeor(io_addr); } while (lmt_status == 0); } else { diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c index 5842facb5..9ade66db2 100644 --- a/drivers/net/cnxk/cn9k_tx_vec.c +++ b/drivers/net/cnxk/cn9k_tx_vec.c @@ -13,9 +13,8 @@ { \ uint64_t cmd[sz]; \ \ - /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ - (flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* TSO is not supported by vec */ \ + if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ (flags)); \ -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable TSO offload in vector Tx burst function. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 2 +- drivers/net/cnxk/cn10k_tx.h | 97 +++++++++++++++++++++++++++++++++ drivers/net/cnxk/cn10k_tx_vec.c | 5 +- drivers/net/cnxk/cn9k_tx.c | 2 +- drivers/net/cnxk/cn9k_tx.h | 94 ++++++++++++++++++++++++++++++++ drivers/net/cnxk/cn9k_tx_vec.c | 5 +- 6 files changed, 199 insertions(+), 6 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index c4c3e6570..d06879163 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -67,7 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) + if (dev->scalar_ena) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 8af6799ff..26797581e 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -689,6 +689,46 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, #if defined(RTE_ARCH_ARM64) +static __rte_always_inline void +cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, + union nix_send_ext_w0_u *w0, uint64_t ol_flags, + const uint64_t flags, const uint64_t lso_tun_fmt) +{ + uint16_t lso_sb; + uint64_t mask; + + if (!(ol_flags & PKT_TX_TCP_SEG)) + return; + + mask = -(!w1->il3type); + lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len; + + w0->u |= BIT(14); + w0->lso_sb = lso_sb; + w0->lso_mps = m->tso_segsz; + w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6); + w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM; + + /* Handle tunnel tso */ + if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) && + (ol_flags & PKT_TX_TUNNEL_MASK)) { + const uint8_t is_udp_tun = + (CNXK_NIX_UDP_TUN_BITMASK >> + ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & + 0x1; + uint8_t shift = is_udp_tun ? 32 : 0; + + shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4); + shift += (!!(ol_flags & PKT_TX_IPV6) << 3); + + w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM; + w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0; + /* Update format for UDP tunneled packet */ + + w0->lso_format = (lso_tun_fmt >> shift); + } +} + #define NIX_DESCS_PER_LOOP 4 static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, @@ -723,6 +763,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, /* Reduce the cached count */ txq->fc_cache_pkts -= pkts; + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) { + for (i = 0; i < pkts; i++) + cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags); + } senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0); senddesc23_w0 = senddesc01_w0; @@ -781,6 +826,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendmem23_w1 = sendmem01_w1; } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + /* Clear the LSO enable bit. */ + sendext01_w0 = vbicq_u64(sendext01_w0, + vdupq_n_u64(BIT_ULL(14))); + sendext23_w0 = sendext01_w0; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1430,6 +1482,51 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + const uint64_t lso_fmt = txq->lso_tun_fmt; + uint64_t sx_w0[NIX_DESCS_PER_LOOP]; + uint64_t sd_w1[NIX_DESCS_PER_LOOP]; + + /* Extract SD W1 as we need to set L4 types. */ + vst1q_u64(sd_w1, senddesc01_w1); + vst1q_u64(sd_w1 + 2, senddesc23_w1); + + /* Extract SX W0 as we need to set LSO fields. */ + vst1q_u64(sx_w0, sendext01_w0); + vst1q_u64(sx_w0 + 2, sendext23_w0); + + /* Extract ol_flags. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Prepare individual mbufs. */ + cn10k_nix_prepare_tso(tx_pkts[0], + (union nix_send_hdr_w1_u *)&sd_w1[0], + (union nix_send_ext_w0_u *)&sx_w0[0], + vgetq_lane_u64(xtmp128, 0), flags, lso_fmt); + + cn10k_nix_prepare_tso(tx_pkts[1], + (union nix_send_hdr_w1_u *)&sd_w1[1], + (union nix_send_ext_w0_u *)&sx_w0[1], + vgetq_lane_u64(xtmp128, 1), flags, lso_fmt); + + cn10k_nix_prepare_tso(tx_pkts[2], + (union nix_send_hdr_w1_u *)&sd_w1[2], + (union nix_send_ext_w0_u *)&sx_w0[2], + vgetq_lane_u64(ytmp128, 0), flags, lso_fmt); + + cn10k_nix_prepare_tso(tx_pkts[3], + (union nix_send_hdr_w1_u *)&sd_w1[3], + (union nix_send_ext_w0_u *)&sx_w0[3], + vgetq_lane_u64(ytmp128, 1), flags, lso_fmt); + + senddesc01_w1 = vld1q_u64(sd_w1); + senddesc23_w1 = vld1q_u64(sd_w1 + 2); + + sendext01_w0 = vld1q_u64(sx_w0); + sendext23_w0 = vld1q_u64(sx_w0 + 2); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 0b4a4c7ba..34e373750 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -13,8 +13,9 @@ { \ uint64_t cmd[sz]; \ \ - /* TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ (flags)); \ diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index c32681ed4..735e21cc6 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -66,7 +66,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) + if (dev->scalar_ena) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index cb574a1c1..dca732a9f 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -545,6 +545,43 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, #if defined(RTE_ARCH_ARM64) +static __rte_always_inline void +cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, + union nix_send_ext_w0_u *w0, uint64_t ol_flags, + uint64_t flags) +{ + uint16_t lso_sb; + uint64_t mask; + + if (!(ol_flags & PKT_TX_TCP_SEG)) + return; + + mask = -(!w1->il3type); + lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len; + + w0->u |= BIT(14); + w0->lso_sb = lso_sb; + w0->lso_mps = m->tso_segsz; + w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6); + w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM; + + /* Handle tunnel tso */ + if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) && + (ol_flags & PKT_TX_TUNNEL_MASK)) { + const uint8_t is_udp_tun = + (CNXK_NIX_UDP_TUN_BITMASK >> + ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & + 0x1; + + w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM; + w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0; + /* Update format for UDP tunneled packet */ + w0->lso_format += is_udp_tun ? 2 : 6; + + w0->lso_format += !!(ol_flags & PKT_TX_OUTER_IPV6) << 1; + } +} + #define NIX_DESCS_PER_LOOP 4 static __rte_always_inline uint16_t cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, @@ -580,6 +617,12 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, /* Reduce the cached count */ txq->fc_cache_pkts -= pkts; + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) { + for (i = 0; i < pkts; i++) + cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags); + } + /* Lets commit any changes in the packet here as no further changes * to the packet will be done unless no fast free is enabled. */ @@ -637,6 +680,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendmem23_w1 = sendmem01_w1; } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + /* Clear the LSO enable bit. */ + sendext01_w0 = vbicq_u64(sendext01_w0, + vdupq_n_u64(BIT_ULL(14))); + sendext23_w0 = sendext01_w0; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1286,6 +1336,50 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + uint64_t sx_w0[NIX_DESCS_PER_LOOP]; + uint64_t sd_w1[NIX_DESCS_PER_LOOP]; + + /* Extract SD W1 as we need to set L4 types. */ + vst1q_u64(sd_w1, senddesc01_w1); + vst1q_u64(sd_w1 + 2, senddesc23_w1); + + /* Extract SX W0 as we need to set LSO fields. */ + vst1q_u64(sx_w0, sendext01_w0); + vst1q_u64(sx_w0 + 2, sendext23_w0); + + /* Extract ol_flags. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Prepare individual mbufs. */ + cn9k_nix_prepare_tso(tx_pkts[0], + (union nix_send_hdr_w1_u *)&sd_w1[0], + (union nix_send_ext_w0_u *)&sx_w0[0], + vgetq_lane_u64(xtmp128, 0), flags); + + cn9k_nix_prepare_tso(tx_pkts[1], + (union nix_send_hdr_w1_u *)&sd_w1[1], + (union nix_send_ext_w0_u *)&sx_w0[1], + vgetq_lane_u64(xtmp128, 1), flags); + + cn9k_nix_prepare_tso(tx_pkts[2], + (union nix_send_hdr_w1_u *)&sd_w1[2], + (union nix_send_ext_w0_u *)&sx_w0[2], + vgetq_lane_u64(ytmp128, 0), flags); + + cn9k_nix_prepare_tso(tx_pkts[3], + (union nix_send_hdr_w1_u *)&sd_w1[3], + (union nix_send_ext_w0_u *)&sx_w0[3], + vgetq_lane_u64(ytmp128, 1), flags); + + senddesc01_w1 = vld1q_u64(sd_w1); + senddesc23_w1 = vld1q_u64(sd_w1 + 2); + + sendext01_w0 = vld1q_u64(sx_w0); + sendext23_w0 = vld1q_u64(sx_w0 + 2); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c index 9ade66db2..56a3e2514 100644 --- a/drivers/net/cnxk/cn9k_tx_vec.c +++ b/drivers/net/cnxk/cn9k_tx_vec.c @@ -13,8 +13,9 @@ { \ uint64_t cmd[sz]; \ \ - /* TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ (flags)); \ -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add multi segment Tx vector routine. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 20 +- drivers/net/cnxk/cn10k_tx.h | 388 +++++++++++++++++++++++++-- drivers/net/cnxk/cn10k_tx_vec_mseg.c | 24 ++ drivers/net/cnxk/cn9k_tx.c | 20 +- drivers/net/cnxk/cn9k_tx.h | 272 ++++++++++++++++++- drivers/net/cnxk/cn9k_tx_vec_mseg.c | 24 ++ drivers/net/cnxk/meson.build | 6 +- 7 files changed, 709 insertions(+), 45 deletions(-) create mode 100644 drivers/net/cnxk/cn10k_tx_vec_mseg.c create mode 100644 drivers/net/cnxk/cn9k_tx_vec_mseg.c diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index d06879163..1f30bab59 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -67,13 +67,23 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena) + const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_vec_mseg_##name, + + NIX_TX_FASTPATH_MODES +#undef T + }; + + if (dev->scalar_ena) { pick_tx_func(eth_dev, nix_eth_tx_burst); - else + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + } else { pick_tx_func(eth_dev, nix_eth_tx_vec_burst); - - if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) - pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg); + } rte_mb(); } diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 26797581e..532b53b31 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -42,6 +42,13 @@ } \ } while (0) +/* Encoded number of segments to number of dwords macro, each value of nb_segs + * is encoded as 4bits. + */ +#define NIX_SEGDW_MAGIC 0x76654432210ULL + +#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF) + #define LMT_OFF(lmt_addr, lmt_num, offset) \ (void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset)) @@ -102,6 +109,14 @@ cn10k_nix_tx_steor_data(const uint16_t flags) return data; } +static __rte_always_inline uint8_t +cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags) +{ + return ((flags & NIX_TX_NEED_EXT_HDR) ? + (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 : + 4); +} + static __rte_always_inline uint64_t cn10k_nix_tx_steor_vec_data(const uint16_t flags) { @@ -729,7 +744,244 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, } } +static __rte_always_inline void +cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd, + union nix_send_hdr_w0_u *sh, + union nix_send_sg_s *sg, const uint32_t flags) +{ + struct rte_mbuf *m_next; + uint64_t *slist, sg_u; + uint16_t nb_segs; + int i = 1; + + sh->total = m->pkt_len; + /* Clear sg->u header before use */ + sg->u &= 0xFC00000000000000; + sg_u = sg->u; + slist = &cmd[0]; + + sg_u = sg_u | ((uint64_t)m->data_len); + + nb_segs = m->nb_segs - 1; + m_next = m->next; + + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << 55); + /* Mark mempool object as "put" since it is freed by NIX */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + + m = m_next; + /* Fill mbuf segments */ + do { + m_next = m->next; + sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); + *slist = rte_mbuf_data_iova(m); + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55)); + /* Mark mempool object as "put" since it is freed by NIX + */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << (i + 55)))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + slist++; + i++; + nb_segs--; + if (i > 2 && nb_segs) { + i = 0; + /* Next SG subdesc */ + *(uint64_t *)slist = sg_u & 0xFC00000000000000; + sg->u = sg_u; + sg->segs = 3; + sg = (union nix_send_sg_s *)slist; + sg_u = sg->u; + slist++; + } + m = m_next; + } while (nb_segs); + + sg->u = sg_u; + sg->segs = i; +} + +static __rte_always_inline void +cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0, + uint64x2_t *cmd1, const uint8_t segdw, + const uint32_t flags) +{ + union nix_send_hdr_w0_u sh; + union nix_send_sg_s sg; + + if (m->nb_segs == 1) { + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + sg.u = vgetq_lane_u64(cmd1[0], 0); + sg.u |= (cnxk_nix_prefree_seg(m) << 55); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); + } + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + sg.u = vgetq_lane_u64(cmd1[0], 0); + if (!(sg.u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + return; + } + + sh.u = vgetq_lane_u64(cmd0[0], 0); + sg.u = vgetq_lane_u64(cmd1[0], 0); + + cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags); + + sh.sizem1 = segdw - 1; + cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); +} + #define NIX_DESCS_PER_LOOP 4 + +static __rte_always_inline uint8_t +cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0, + uint64x2_t *cmd1, uint64x2_t *cmd2, + uint64x2_t *cmd3, uint8_t *segdw, + uint64_t *lmt_addr, __uint128_t *data128, + uint8_t *shift, const uint16_t flags) +{ + uint8_t j, off, lmt_used; + + if (!(flags & NIX_TX_NEED_EXT_HDR) && + !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + /* No segments in 4 consecutive packets. */ + if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) { + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) + cn10k_nix_prepare_mseg_vec(mbufs[j], NULL, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd1[0]); + vst1q_u64(lmt_addr + 4, cmd0[1]); + vst1q_u64(lmt_addr + 6, cmd1[1]); + vst1q_u64(lmt_addr + 8, cmd0[2]); + vst1q_u64(lmt_addr + 10, cmd1[2]); + vst1q_u64(lmt_addr + 12, cmd0[3]); + vst1q_u64(lmt_addr + 14, cmd1[3]); + + *data128 |= ((__uint128_t)7) << *shift; + shift += 3; + + return 1; + } + } + + lmt_used = 0; + for (j = 0; j < NIX_DESCS_PER_LOOP;) { + /* Fit consecutive packets in same LMTLINE. */ + if ((segdw[j] + segdw[j + 1]) <= 8) { + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + cn10k_nix_prepare_mseg_vec(mbufs[j], NULL, + &cmd0[j], &cmd1[j], + segdw[j], flags); + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL, + &cmd0[j + 1], + &cmd1[j + 1], + segdw[j + 1], flags); + /* TSTAMP takes 4 each, no segs. */ + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + vst1q_u64(lmt_addr + 6, cmd3[j]); + + vst1q_u64(lmt_addr + 8, cmd0[j + 1]); + vst1q_u64(lmt_addr + 10, cmd2[j + 1]); + vst1q_u64(lmt_addr + 12, cmd1[j + 1]); + vst1q_u64(lmt_addr + 14, cmd3[j + 1]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + /* EXT header take 3 each, space for 2 segs.*/ + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 6, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + off = segdw[j] - 3; + off <<= 1; + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], + lmt_addr + 12 + off, + &cmd0[j + 1], + &cmd1[j + 1], + segdw[j + 1], flags); + vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]); + vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]); + } else { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 4, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + off = segdw[j] - 2; + off <<= 1; + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], + lmt_addr + 8 + off, + &cmd0[j + 1], + &cmd1[j + 1], + segdw[j + 1], flags); + vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]); + } + *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1) + << *shift; + *shift += 3; + j += 2; + } else { + if ((flags & NIX_TX_NEED_EXT_HDR) && + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 6, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + off = segdw[j] - 4; + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd3[j]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 6, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + } else { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 4, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + } + *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift; + *shift += 3; + j++; + } + lmt_used++; + lmt_addr += 16; + } + + return lmt_used; +} + static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t *cmd, const uint16_t flags) @@ -738,7 +990,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP]; - uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa; + uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint16_t left, scalar, burst, i, lmt_id; @@ -746,6 +998,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t sendext01_w1, sendext23_w1; uint64x2_t sendmem01_w0, sendmem23_w0; uint64x2_t sendmem01_w1, sendmem23_w1; + uint8_t segdw[NIX_DESCS_PER_LOOP + 1]; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn10k_eth_txq *txq = tx_queue; @@ -754,7 +1007,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t ltypes01, ltypes23; uint64x2_t xtmp128, ytmp128; uint64x2_t xmask01, xmask23; - uint8_t lnum; + uint8_t lnum, shift; + union wdata { + __uint128_t data128; + uint64_t data[2]; + } wd; NIX_XMIT_FC_OR_RETURN(txq, pkts); @@ -798,8 +1055,43 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, burst = left > cn10k_nix_pkts_per_vec_brst(flags) ? cn10k_nix_pkts_per_vec_brst(flags) : left; + if (flags & NIX_TX_MULTI_SEG_F) { + wd.data128 = 0; + shift = 16; + } lnum = 0; + for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) { + if (flags & NIX_TX_MULTI_SEG_F) { + struct rte_mbuf *m = tx_pkts[j]; + uint8_t j; + + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) { + /* Get dwords based on nb_segs. */ + segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs); + /* Add dwords based on offloads. */ + segdw[j] += 1 + /* SEND HDR */ + !!(flags & NIX_TX_NEED_EXT_HDR) + + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); + } + + /* Check if there are enough LMTLINES for this loop */ + if (lnum + 4 > 32) { + uint8_t ldwords_con = 0, lneeded = 0; + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) { + ldwords_con += segdw[j]; + if (ldwords_con > 8) { + lneeded += 1; + ldwords_con = segdw[j]; + } + } + lneeded += 1; + if (lnum + lneeded > 32) { + burst = i; + break; + } + } + } /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */ senddesc01_w0 = vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF)); @@ -1527,7 +1819,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w0 = vld1q_u64(sx_w0 + 2); } - if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) && + !(flags & NIX_TX_MULTI_SEG_F)) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); xmask23 = xmask01; @@ -1567,7 +1860,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, (void **)&mbuf3, 1, 0); senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); - } else { + } else if (!(flags & NIX_TX_MULTI_SEG_F)) { /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1612,7 +1905,19 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); } - if (flags & NIX_TX_NEED_EXT_HDR) { + if (flags & NIX_TX_MULTI_SEG_F) { + uint8_t j; + + segdw[4] = 8; + j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1, + cmd2, cmd3, segdw, + (uint64_t *) + LMT_OFF(laddr, lnum, + 0), + &wd.data128, &shift, + flags); + lnum += j; + } else if (flags & NIX_TX_NEED_EXT_HDR) { /* Store the prepared send desc to LMT lines */ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); @@ -1664,34 +1969,55 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[0] >>= 16; + /* Trigger LMTST */ if (lnum > 16) { - data = cn10k_nix_tx_steor_vec_data(flags); - pa = io_addr | (data & 0x7) << 4; - data &= ~0x7ULL; - data |= (15ULL << 12); - data |= (uint64_t)lmt_id; + if (!(flags & NIX_TX_MULTI_SEG_F)) + wd.data[0] = cn10k_nix_tx_steor_vec_data(flags); + + pa = io_addr | (wd.data[0] & 0x7) << 4; + wd.data[0] &= ~0x7ULL; + + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[0] <<= 16; + + wd.data[0] |= (15ULL << 12); + wd.data[0] |= (uint64_t)lmt_id; /* STEOR0 */ - roc_lmt_submit_steorl(data, pa); + roc_lmt_submit_steorl(wd.data[0], pa); - data = cn10k_nix_tx_steor_vec_data(flags); - pa = io_addr | (data & 0x7) << 4; - data &= ~0x7ULL; - data |= ((uint64_t)(lnum - 17)) << 12; - data |= (uint64_t)(lmt_id + 16); + if (!(flags & NIX_TX_MULTI_SEG_F)) + wd.data[1] = cn10k_nix_tx_steor_vec_data(flags); + + pa = io_addr | (wd.data[1] & 0x7) << 4; + wd.data[1] &= ~0x7ULL; + + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[1] <<= 16; + + wd.data[1] |= ((uint64_t)(lnum - 17)) << 12; + wd.data[1] |= (uint64_t)(lmt_id + 16); /* STEOR1 */ - roc_lmt_submit_steorl(data, pa); + roc_lmt_submit_steorl(wd.data[1], pa); } else if (lnum) { - data = cn10k_nix_tx_steor_vec_data(flags); - pa = io_addr | (data & 0x7) << 4; - data &= ~0x7ULL; - data |= ((uint64_t)(lnum - 1)) << 12; - data |= lmt_id; + if (!(flags & NIX_TX_MULTI_SEG_F)) + wd.data[0] = cn10k_nix_tx_steor_vec_data(flags); + + pa = io_addr | (wd.data[0] & 0x7) << 4; + wd.data[0] &= ~0x7ULL; + + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[0] <<= 16; + + wd.data[0] |= ((uint64_t)(lnum - 1)) << 12; + wd.data[0] |= lmt_id; /* STEOR0 */ - roc_lmt_submit_steorl(data, pa); + roc_lmt_submit_steorl(wd.data[0], pa); } left -= burst; @@ -1699,9 +2025,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (left) goto again; - if (unlikely(scalar)) - pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd, - flags); + if (unlikely(scalar)) { + if (flags & NIX_TX_MULTI_SEG_F) + pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, + scalar, cmd, flags); + else + pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, + cmd, flags); + } return pkts; } @@ -1866,7 +2197,10 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \ - void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ NIX_TX_FASTPATH_MODES #undef T diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c new file mode 100644 index 000000000..1fad81dba --- /dev/null +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_ethdev.h" +#include "cn10k_tx.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \ + { \ + uint64_t cmd[sz]; \ + \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ + return 0; \ + return cn10k_nix_xmit_pkts_vector( \ + tx_queue, tx_pkts, pkts, cmd, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index 735e21cc6..763f9a14f 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -66,13 +66,23 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena) + const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_nix_xmit_pkts_vec_mseg_##name, + + NIX_TX_FASTPATH_MODES +#undef T + }; + + if (dev->scalar_ena) { pick_tx_func(eth_dev, nix_eth_tx_burst); - else + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + } else { pick_tx_func(eth_dev, nix_eth_tx_vec_burst); - - if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) - pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg); + } rte_mb(); } diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index dca732a9f..ed65cd351 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -582,7 +582,238 @@ cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, } } +static __rte_always_inline uint8_t +cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd, + union nix_send_hdr_w0_u *sh, + union nix_send_sg_s *sg, const uint32_t flags) +{ + struct rte_mbuf *m_next; + uint64_t *slist, sg_u; + uint16_t nb_segs; + uint64_t segdw; + int i = 1; + + sh->total = m->pkt_len; + /* Clear sg->u header before use */ + sg->u &= 0xFC00000000000000; + sg_u = sg->u; + slist = &cmd[0]; + + sg_u = sg_u | ((uint64_t)m->data_len); + + nb_segs = m->nb_segs - 1; + m_next = m->next; + + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << 55); + /* Mark mempool object as "put" since it is freed by NIX */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + + m = m_next; + /* Fill mbuf segments */ + do { + m_next = m->next; + sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); + *slist = rte_mbuf_data_iova(m); + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55)); + /* Mark mempool object as "put" since it is freed by NIX + */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << (i + 55)))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + slist++; + i++; + nb_segs--; + if (i > 2 && nb_segs) { + i = 0; + /* Next SG subdesc */ + *(uint64_t *)slist = sg_u & 0xFC00000000000000; + sg->u = sg_u; + sg->segs = 3; + sg = (union nix_send_sg_s *)slist; + sg_u = sg->u; + slist++; + } + m = m_next; + } while (nb_segs); + + sg->u = sg_u; + sg->segs = i; + segdw = (uint64_t *)slist - (uint64_t *)&cmd[0]; + + segdw += 2; + /* Roundup extra dwords to multiple of 2 */ + segdw = (segdw >> 1) + (segdw & 0x1); + /* Default dwords */ + segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) + + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); + sh->sizem1 = segdw - 1; + + return segdw; +} + +static __rte_always_inline uint8_t +cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0, + uint64x2_t *cmd1, const uint32_t flags) +{ + union nix_send_hdr_w0_u sh; + union nix_send_sg_s sg; + uint8_t ret; + + if (m->nb_segs == 1) { + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + sg.u = vgetq_lane_u64(cmd1[0], 0); + sg.u |= (cnxk_nix_prefree_seg(m) << 55); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); + } + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + sg.u = vgetq_lane_u64(cmd1[0], 0); + if (!(sg.u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) + + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); + } + + sh.u = vgetq_lane_u64(cmd0[0], 0); + sg.u = vgetq_lane_u64(cmd1[0], 0); + + ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags); + + cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); + return ret; +} + #define NIX_DESCS_PER_LOOP 4 + +static __rte_always_inline void +cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1, + uint64x2_t *cmd2, uint64x2_t *cmd3, + uint8_t *segdw, + uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2], + uint64_t *lmt_addr, rte_iova_t io_addr, + const uint32_t flags) +{ + uint64_t lmt_status; + uint8_t j, off; + + if (!(flags & NIX_TX_NEED_EXT_HDR) && + !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + /* No segments in 4 consecutive packets. */ + if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) { + do { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd1[0]); + vst1q_u64(lmt_addr + 4, cmd0[1]); + vst1q_u64(lmt_addr + 6, cmd1[1]); + vst1q_u64(lmt_addr + 8, cmd0[2]); + vst1q_u64(lmt_addr + 10, cmd1[2]); + vst1q_u64(lmt_addr + 12, cmd0[3]); + vst1q_u64(lmt_addr + 14, cmd1[3]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + + return; + } + } + + for (j = 0; j < NIX_DESCS_PER_LOOP;) { + /* Fit consecutive packets in same LMTLINE. */ + if ((segdw[j] + segdw[j + 1]) <= 8) { +again0: + if ((flags & NIX_TX_NEED_EXT_HDR) && + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 4; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd3[j]); + + vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]); + vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]); + roc_lmt_mov_seg(lmt_addr + 14 + off, + slist[j + 1], segdw[j + 1] - 4); + off += ((segdw[j + 1] - 4) << 1); + vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 3; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]); + vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]); + roc_lmt_mov_seg(lmt_addr + 12 + off, + slist[j + 1], segdw[j + 1] - 3); + } else { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 2; + roc_lmt_mov_seg(lmt_addr + 4, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]); + roc_lmt_mov_seg(lmt_addr + 8 + off, + slist[j + 1], segdw[j + 1] - 2); + } + lmt_status = roc_lmt_submit_ldeor(io_addr); + if (lmt_status == 0) + goto again0; + j += 2; + } else { +again1: + if ((flags & NIX_TX_NEED_EXT_HDR) && + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 4; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd3[j]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 3; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + } else { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 2; + roc_lmt_mov_seg(lmt_addr + 4, slist[j], off); + } + lmt_status = roc_lmt_submit_ldeor(io_addr); + if (lmt_status == 0) + goto again1; + j += 1; + } + } +} + static __rte_always_inline uint16_t cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t *cmd, const uint16_t flags) @@ -1380,7 +1611,8 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w0 = vld1q_u64(sx_w0 + 2); } - if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) && + !(flags & NIX_TX_MULTI_SEG_F)) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); xmask23 = xmask01; @@ -1424,7 +1656,7 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, * cnxk_nix_prefree_seg are written before LMTST. */ rte_io_wmb(); - } else { + } else if (!(flags & NIX_TX_MULTI_SEG_F)) { /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1472,7 +1704,27 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); } - if (flags & NIX_TX_NEED_EXT_HDR) { + if (flags & NIX_TX_MULTI_SEG_F) { + uint64_t seg_list[NIX_DESCS_PER_LOOP] + [CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; + uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1]; + + /* Build mseg list for each packet individually. */ + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) + segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j], + seg_list[j], &cmd0[j], + &cmd1[j], flags); + segdw[4] = 8; + + /* Commit all changes to mbuf before LMTST. */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + rte_io_wmb(); + + cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3, + segdw, seg_list, + lmt_addr, io_addr, + flags); + } else if (flags & NIX_TX_NEED_EXT_HDR) { /* With ext header in the command we can no longer send * all 4 packets together since LMTLINE is 128bytes. * Split and Tx twice. @@ -1534,9 +1786,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } - if (unlikely(pkts_left)) - pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd, - flags); + if (unlikely(pkts_left)) { + if (flags & NIX_TX_MULTI_SEG_F) + pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, + pkts_left, cmd, flags); + else + pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, + cmd, flags); + } return pkts; } @@ -1701,6 +1958,9 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn9k_tx_vec_mseg.c b/drivers/net/cnxk/cn9k_tx_vec_mseg.c new file mode 100644 index 000000000..0256efd45 --- /dev/null +++ b/drivers/net/cnxk/cn9k_tx_vec_mseg.c @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_ethdev.h" +#include "cn9k_tx.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \ + { \ + uint64_t cmd[sz]; \ + \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ + return 0; \ + return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ + (flags) | \ + NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build index aa8c7253f..361f7ce84 100644 --- a/drivers/net/cnxk/meson.build +++ b/drivers/net/cnxk/meson.build @@ -26,7 +26,8 @@ sources += files('cn9k_ethdev.c', 'cn9k_rx_vec_mseg.c', 'cn9k_tx.c', 'cn9k_tx_mseg.c', - 'cn9k_tx_vec.c') + 'cn9k_tx_vec.c', + 'cn9k_tx_vec_mseg.c') # CN10K sources += files('cn10k_ethdev.c', 'cn10k_rte_flow.c', @@ -36,7 +37,8 @@ sources += files('cn10k_ethdev.c', 'cn10k_rx_vec_mseg.c', 'cn10k_tx.c', 'cn10k_tx_mseg.c', - 'cn10k_tx_vec.c') + 'cn10k_tx_vec.c', + 'cn10k_tx_vec_mseg.c') deps += ['bus_pci', 'cryptodev', 'eventdev', 'security'] deps += ['common_cnxk', 'mempool_cnxk'] -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter. Resize cn10k workslot fastpath structure to fit in 64B cacheline size. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- v4 Changes: - Split patches for easier merge. v3 Changes: - Spell check. doc/guides/eventdevs/cnxk.rst | 28 ++++ doc/guides/rel_notes/release_21_08.rst | 5 + drivers/common/cnxk/roc_nix.h | 3 + drivers/common/cnxk/roc_nix_fc.c | 78 ++++++++++ drivers/common/cnxk/roc_nix_priv.h | 3 +- drivers/common/cnxk/version.map | 1 + drivers/event/cnxk/cn10k_eventdev.c | 107 +++++++++++--- drivers/event/cnxk/cn10k_worker.c | 7 +- drivers/event/cnxk/cn10k_worker.h | 32 +++-- drivers/event/cnxk/cn9k_eventdev.c | 89 ++++++++++++ drivers/event/cnxk/cn9k_worker.h | 4 + drivers/event/cnxk/cnxk_eventdev.c | 2 + drivers/event/cnxk/cnxk_eventdev.h | 43 ++++-- drivers/event/cnxk/cnxk_eventdev_adptr.c | 176 +++++++++++++++++++++++ drivers/event/cnxk/meson.build | 9 +- 15 files changed, 540 insertions(+), 47 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 36da3800c..b7e82c127 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -39,6 +39,10 @@ Features of the OCTEON cnxk SSO PMD are: time granularity of 2.5us on CN9K and 1us on CN10K. - Up to 256 TIM rings a.k.a event timer adapters. - Up to 8 rings traversed in parallel. +- HW managed packets enqueued from ethdev to eventdev exposed through event eth + RX adapter. +- N:1 ethernet device Rx queue to Event queue mapping. +- Full Rx offload support defined through ethdev queue configuration. Prerequisites and Compilation procedure --------------------------------------- @@ -93,6 +97,15 @@ Runtime Config Options -a 0002:0e:00.0,qos=[1-50-50-50] +- ``Force Rx Back pressure`` + + Force Rx back pressure when same mempool is used across ethernet device + connected to event device. + + For example:: + + -a 0002:0e:00.0,force_rx_bp=1 + - ``TIM disable NPA`` By default chunks are allocated from NPA then TIM can automatically free @@ -160,3 +173,18 @@ Debugging Options +---+------------+-------------------------------------------------------+ | 2 | TIM | --log-level='pmd\.event\.cnxk\.timer,8' | +---+------------+-------------------------------------------------------+ + +Limitations +----------- + +Rx adapter support +~~~~~~~~~~~~~~~~~~ + +Using the same mempool for all the ethernet device ports connected to +event device would cause back pressure to be asserted only on the first +ethernet device. +Back pressure is automatically disabled when using same mempool for all the +ethernet devices connected to event device to override this applications can +use `force_rx_bp=1` device arguments. +Using unique mempool per each ethernet device is recommended when they are +connected to event device. diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 31e49e1a5..3892c8017 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -60,6 +60,11 @@ New Features * Added net/cnxk driver which provides the support for the integrated ethernet device. +* **Added support for Marvell CN10K, CN9K, event Rx adapter.** + + * Added Rx adapter support for event/cnxk when the ethernet device requested is + net/cnxk. + Removed Items ------------- diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index bb6902795..76613fe84 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix); +void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, + uint8_t ena, uint8_t force); + /* NPC */ int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable); diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c index 47be8aa3f..f17eba416 100644 --- a/drivers/common/cnxk/roc_nix_fc.c +++ b/drivers/common/cnxk/roc_nix_fc.c @@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode mode) exit: return rc; } + +void +rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena, + uint8_t force) +{ + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + struct npa_lf *lf = idev_npa_obj_get(); + struct npa_aq_enq_req *req; + struct npa_aq_enq_rsp *rsp; + struct mbox *mbox; + uint32_t limit; + int rc; + + if (roc_nix_is_sdp(roc_nix)) + return; + + if (!lf) + return; + mbox = lf->mbox; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_READ; + + rc = mbox_process_msg(mbox, (void *)&rsp); + if (rc) + return; + + limit = rsp->aura.limit; + /* BP is already enabled. */ + if (rsp->aura.bp_ena) { + /* If BP ids don't match disable BP. */ + if ((rsp->aura.nix0_bpid != nix->bpid[0]) && !force) { + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + req->aura.bp_ena = 0; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); + } + return; + } + + /* BP was previously enabled but now disabled skip. */ + if (rsp->aura.bp) + return; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + if (ena) { + req->aura.nix0_bpid = nix->bpid[0]; + req->aura_mask.nix0_bpid = ~(req->aura_mask.nix0_bpid); + req->aura.bp = NIX_RQ_AURA_THRESH( + limit > 128 ? 256 : limit); /* 95% of size*/ + req->aura_mask.bp = ~(req->aura_mask.bp); + } + + req->aura.bp_ena = !!ena; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); +} diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h index d9c32df44..9dc0c88a6 100644 --- a/drivers/common/cnxk/roc_nix_priv.h +++ b/drivers/common/cnxk/roc_nix_priv.h @@ -16,7 +16,8 @@ #define NIX_SQB_LOWER_THRESH ((uint16_t)70) /* Apply BP/DROP when CQ is 95% full */ -#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_RQ_AURA_THRESH(x) (((x) * 95) / 100) /* IRQ triggered when NIX_LF_CINTX_CNT[QCOUNT] crosses this value */ #define CQ_CQE_THRESH_DEFAULT 0x1ULL diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map index 8a5c839e5..cb1ce4b6f 100644 --- a/drivers/common/cnxk/version.map +++ b/drivers/common/cnxk/version.map @@ -29,6 +29,7 @@ INTERNAL { roc_nix_fc_config_set; roc_nix_fc_mode_set; roc_nix_fc_mode_get; + rox_nix_fc_npa_bp_cfg; roc_nix_get_base_chan; roc_nix_get_pf; roc_nix_get_pf_func; diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index bf4052c76..2060c8fe8 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -6,18 +6,6 @@ #include "cnxk_eventdev.h" #include "cnxk_worker.h" -static void -cn10k_init_hws_ops(struct cn10k_sso_hws *ws, uintptr_t base) -{ - ws->tag_wqe_op = base + SSOW_LF_GWS_WQE0; - ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0; - ws->updt_wqe_op = base + SSOW_LF_GWS_OP_UPD_WQP_GRP1; - ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM; - ws->swtag_untag_op = base + SSOW_LF_GWS_OP_SWTAG_UNTAG; - ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH; - ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED; -} - static uint32_t cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev) { @@ -56,7 +44,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); - cn10k_init_hws_ops(ws, ws->base); ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -135,13 +122,14 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cq_ds_cnt &= 0x3FFF3FFF0000; while (aq_cnt || cq_ds_cnt || ds_cnt) { - plt_write64(req, ws->getwrk_op); + plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0); cn10k_sso_hws_get_work_empty(ws, &ev); if (fn != NULL && ev.u64 != 0) fn(arg, ev); if (ev.sched_type != SSO_TT_EMPTY) - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, - ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush( + ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); do { val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE); } while (val & BIT_ULL(56)); @@ -205,9 +193,11 @@ cn10k_sso_hws_reset(void *arg, void *hws) if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) { - plt_write64(BIT_ULL(16) | 1, ws->getwrk_op); + plt_write64(BIT_ULL(16) | 1, + ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0)); if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */ @@ -407,6 +397,80 @@ cn10k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn10k)); } +static int +cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } +} + +static int +cn10k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn10k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn10k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -420,6 +484,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .port_unlink = cn10k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -502,6 +572,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn10k, cn10k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN10K_SSO_GW_MODE "=<int>" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index e2aa534c6..5dbae275b 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -18,7 +18,8 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev) cn10k_sso_hws_forward_event(ws, ev); break; case RTE_EVENT_OP_RELEASE: - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); break; default: return 0; @@ -69,7 +70,7 @@ cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return 1; } @@ -94,7 +95,7 @@ cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return ret; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 2f093a8dd..c7250bf9e 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn10k_ethdev.h" +#include "cn10k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t @@ -31,7 +35,8 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) { const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(ws->tag_wqe_op)); + const uint8_t cur_tt = + CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)); /* CNXK model * cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED @@ -43,9 +48,11 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) if (new_tt == SSO_TT_UNTAGGED) { if (cur_tt != SSO_TT_UNTAGGED) - cnxk_sso_hws_swtag_untag(ws->swtag_untag_op); + cnxk_sso_hws_swtag_untag(ws->base + + SSOW_LF_GWS_OP_SWTAG_UNTAG); } else { - cnxk_sso_hws_swtag_norm(tag, new_tt, ws->swtag_norm_op); + cnxk_sso_hws_swtag_norm(tag, new_tt, + ws->base + SSOW_LF_GWS_OP_SWTAG_NORM); } ws->swtag_req = 1; } @@ -57,8 +64,9 @@ cn10k_sso_hws_fwd_group(struct cn10k_sso_hws *ws, const struct rte_event *ev, const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - plt_write64(ev->u64, ws->updt_wqe_op); - cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op); + plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1); + cnxk_sso_hws_swtag_desched(tag, new_tt, grp, + ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED); } static __rte_always_inline void @@ -68,7 +76,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, const uint8_t grp = ev->queue_id; /* Group hasn't changed, Use SWTAG to forward the event */ - if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_wqe_op)) == grp) + if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp) cn10k_sso_hws_fwd_swtag(ws, ev); else /* @@ -93,12 +101,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" : [wdata] "+r"(gw.get_work) - : [gw_loc] "r"(ws->getwrk_op) + : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else - plt_write64(gw.u64[0], ws->getwrk_op); + plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | @@ -130,11 +139,12 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) - : [tag_loc] "r"(ws->tag_wqe_op) + : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 0684417ea..072800c24 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -481,6 +481,88 @@ cn9k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn9k)); } +static int +cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + dws->lookup_mem = lookup_mem; + dws->tstamp = tstmp_info; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } + } +} + +static int +cn9k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn9k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn9k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -494,6 +576,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .port_unlink = cn9k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -571,6 +659,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn9k, cn9k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn9k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn9k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN9K_SSO_SINGLE_WS "=1" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 38fca08fb..f5a440146 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN9K_WORKER_H__ #define __CN9K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn9k_ethdev.h" +#include "cn9k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c index 7189ee3a7..cfd7fb971 100644 --- a/drivers/event/cnxk/cnxk_eventdev.c +++ b/drivers/event/cnxk/cnxk_eventdev.c @@ -571,6 +571,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs) &dev->xae_cnt); rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict, dev); + rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_value, + &dev->force_ena_bp); rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_value, &single_ws); rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 668e51d62..b65d725f5 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -6,6 +6,8 @@ #define __CNXK_EVENTDEV_H__ #include <rte_devargs.h> +#include <rte_ethdev.h> +#include <rte_event_eth_rx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -18,6 +20,7 @@ #define CNXK_SSO_XAE_CNT "xae_cnt" #define CNXK_SSO_GGRP_QOS "qos" +#define CNXK_SSO_FORCE_BP "force_rx_bp" #define CN9K_SSO_SINGLE_WS "single_ws" #define CN10K_SSO_GW_MODE "gw_mode" @@ -81,7 +84,10 @@ struct cnxk_sso_evdev { uint64_t nb_xaq_cfg; rte_iova_t fc_iova; struct rte_mempool *xaq_pool; + uint64_t rx_offloads; uint64_t adptr_xae_cnt; + uint16_t rx_adptr_pool_cnt; + uint64_t *rx_adptr_pools; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -89,25 +95,18 @@ struct cnxk_sso_evdev { uint32_t xae_cnt; uint8_t qos_queue_cnt; struct cnxk_sso_qos *qos_parse_data; + uint8_t force_ena_bp; /* CN9K */ uint8_t dual_ws; /* CN10K */ uint8_t gw_mode; } __rte_cache_aligned; -/* CN10K HWS ops */ -#define CN10K_SSO_HWS_OPS \ - uintptr_t swtag_desched_op; \ - uintptr_t swtag_flush_op; \ - uintptr_t swtag_untag_op; \ - uintptr_t swtag_norm_op; \ - uintptr_t updt_wqe_op; \ - uintptr_t tag_wqe_op; \ - uintptr_t getwrk_op - struct cn10k_sso_hws { - /* Get Work Fastpath data */ - CN10K_SSO_HWS_OPS; + uint64_t base; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint32_t gw_wdata; uint8_t swtag_req; uint8_t hws_id; @@ -115,7 +114,6 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; uintptr_t lmt_base; } __rte_cache_aligned; @@ -132,6 +130,9 @@ struct cn10k_sso_hws { struct cn9k_sso_hws { /* Get Work Fastpath data */ CN9K_SSO_HWS_OPS; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t hws_id; /* Add Work Fastpath data */ @@ -148,6 +149,9 @@ struct cn9k_sso_hws_state { struct cn9k_sso_hws_dual { /* Get Work Fastpath data */ struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */ + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t vws; /* Ping pong bit */ uint8_t hws_id; @@ -250,4 +254,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev, /* CN9K */ void cn9k_sso_set_rsrc(void *arg); +/* Common adapter ops */ +int cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf); +int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id); +int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); +int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); + #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 89a1d82c1..24bfd985e 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -2,6 +2,7 @@ * Copyright(C) 2021 Marvell. */ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" void @@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, int i; switch (event_type) { + case RTE_EVENT_TYPE_ETHDEV: { + struct cnxk_eth_rxq_sp *rxq = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->rx_adptr_pool_cnt; i++) { + if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i]) + return; + } + + dev->rx_adptr_pool_cnt++; + old_ptr = dev->rx_adptr_pools; + dev->rx_adptr_pools = rte_realloc( + dev->rx_adptr_pools, + sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0); + if (dev->rx_adptr_pools == NULL) { + dev->adptr_xae_cnt += rxq->qconf.mp->size; + dev->rx_adptr_pools = old_ptr; + dev->rx_adptr_pool_cnt--; + return; + } + dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] = + (uint64_t)rxq->qconf.mp; + + dev->adptr_xae_cnt += rxq->qconf.mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; @@ -65,3 +92,152 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, break; } } + +static int +cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, + uint16_t port_id, const struct rte_event *ev, + uint8_t custom_flowid) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 1; + rq->tt = ev->sched_type; + rq->hwgrp = ev->queue_id; + rq->flow_tag_width = 20; + rq->wqe_skip = 1; + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4)) + << 24; + + if (custom_flowid) { + rq->flow_tag_width = 0; + rq->tag_mask |= ev->flow_id; + } + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 0; + rq->flow_tag_width = 32; + rq->tag_mask = 0; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +int +cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t port = eth_dev->data->port_id; + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + rxq_sp = eth_dev->data->rx_queues[i]; + rxq_sp = rxq_sp - 1; + cnxk_sso_updt_xae_cnt(dev, rxq_sp, + RTE_EVENT_TYPE_ETHDEV); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc |= cnxk_sso_rxq_enable( + cnxk_eth_dev, i, port, &queue_conf->ev, + !!(queue_conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID)); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, true, + dev->force_ena_bp); + } + } else { + rxq_sp = eth_dev->data->rx_queues[rx_queue_id]; + rxq_sp = rxq_sp - 1; + cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc |= cnxk_sso_rxq_enable( + cnxk_eth_dev, (uint16_t)rx_queue_id, port, + &queue_conf->ev, + !!(queue_conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID)); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, true, + dev->force_ena_bp); + } + + if (rc < 0) { + plt_err("Failed to configure Rx adapter port=%d, q=%d", port, + queue_conf->ev.queue_id); + return rc; + } + + dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags; + + return 0; +} + +int +cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + RTE_SET_USED(event_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + rxq_sp = eth_dev->data->rx_queues[rx_queue_id]; + rxq_sp = rxq_sp - 1; + rc = cnxk_sso_rxq_disable(cnxk_eth_dev, i); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, false, + dev->force_ena_bp); + } + } else { + rxq_sp = eth_dev->data->rx_queues[rx_queue_id]; + rxq_sp = rxq_sp - 1; + rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, false, + dev->force_ena_bp); + } + + if (rc < 0) + plt_err("Failed to clear Rx adapter config port=%d, q=%d", + eth_dev->data->port_id, rx_queue_id); + + return rc; +} + +int +cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +int +cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index 87bb9f76a..eda562f5b 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -21,4 +21,11 @@ sources = files( 'cnxk_tim_worker.c', ) -deps += ['bus_pci', 'common_cnxk'] +extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing'] +foreach flag: extra_flags + if cc.has_argument(flag) + cflags += flag + endif +endforeach + +deps += ['bus_pci', 'common_cnxk', 'net_cnxk'] -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 136 +++++++- drivers/event/cnxk/cn10k_worker.c | 54 ---- drivers/event/cnxk/cn10k_worker.h | 97 +++++- drivers/event/cnxk/cn10k_worker_deq.c | 44 +++ drivers/event/cnxk/cn10k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn10k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_eventdev.c | 305 +++++++++++++++++- drivers/event/cnxk/cn9k_worker.c | 117 ------- drivers/event/cnxk/cn9k_worker.h | 174 ++++++++-- drivers/event/cnxk/cn9k_worker_deq.c | 44 +++ drivers/event/cnxk/cn9k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn9k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_worker_dual_deq.c | 53 +++ .../event/cnxk/cn9k_worker_dual_deq_burst.c | 30 ++ drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 89 +++++ drivers/event/cnxk/cnxk_eventdev.h | 1 + drivers/event/cnxk/meson.build | 9 + 17 files changed, 1124 insertions(+), 231 deletions(-) create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 2060c8fe8..ba7d95fff 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -237,17 +237,141 @@ static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn10k_sso_hws_deq; - event_dev->dequeue_burst = cn10k_sso_hws_deq_burst; - if (dev->is_timeout_deq) { - event_dev->dequeue = cn10k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } } diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index 5dbae275b..c71aa3732 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -60,57 +60,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return 1; - } - - return cn10k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return ret; - } - - ret = cn10k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn10k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index c7250bf9e..b724083ca 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -87,20 +87,37 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, cn10k_sso_hws_fwd_group(ws, ev, grp); } +static __rte_always_inline void +cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t -cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) +cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, void *lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; gw.get_work = ws->gw_wdata; #if defined(RTE_ARCH_ARM64) && !defined(__clang__) asm volatile( PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" - : [wdata] "+r"(gw.get_work) + "sub %[mbuf], %H[wdata], #0x80 \n" + : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf) : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else @@ -109,11 +126,34 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -128,6 +168,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -138,7 +179,9 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " ldp %[tag], %[wqp], [%[tag_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else @@ -146,12 +189,25 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -170,16 +226,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn10k_worker_deq.c b/drivers/event/cnxk/cn10k_worker_deq.c new file mode 100644 index 000000000..36ec454cc --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_burst.c b/drivers/event/cnxk/cn10k_worker_deq_burst.c new file mode 100644 index 000000000..29ecc551c --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_tmo.c b/drivers/event/cnxk/cn10k_worker_deq_tmo.c new file mode 100644 index 000000000..c8524a27b --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 072800c24..e386cb784 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -252,17 +252,202 @@ static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + /* Single WS modes */ + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + /* Dual WS modes */ + const event_dequeue_t sso_hws_dual_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn9k_sso_hws_deq; - event_dev->dequeue_burst = cn9k_sso_hws_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } if (dev->dual_ws) { @@ -272,14 +457,110 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) event_dev->enqueue_forward_burst = cn9k_sso_hws_dual_enq_fwd_burst; - event_dev->dequeue = cn9k_sso_hws_dual_deq; - event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq; - event_dev->dequeue_burst = - cn9k_sso_hws_dual_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_dual_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_dual_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } } } + + rte_mb(); } static void * diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c index 9ceacc98d..538bc4b0b 100644 --- a/drivers/event/cnxk/cn9k_worker.c +++ b/drivers/event/cnxk/cn9k_worker.c @@ -60,60 +60,6 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } -uint16_t __rte_hot -cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return 1; - } - - return cn9k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return ret; - } - - ret = cn9k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn9k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} - /* Dual ws ops. */ uint16_t __rte_hot @@ -171,66 +117,3 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t gw; - - RTE_SET_USED(timeout_ticks); - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return 1; - } - - gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - return gw; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t ret = 1; - uint64_t iter; - - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return ret; - } - - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - } - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index f5a440146..c01c00e1d 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -128,17 +128,36 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, } } +static __rte_always_inline void +cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, struct cn9k_sso_hws_state *ws_pair, - struct rte_event *ev) + struct rte_event *ev, const uint32_t flags, + const void *const lookup_mem, + struct cnxk_timesync_info *const tstamp) { const uint64_t set_gw = BIT_ULL(16) | 1; union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE "rty%=: \n" @@ -147,7 +166,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, " tbnz %[tag], 63, rty%= \n" "done%=: str %[gw], [%[pong]] \n" " dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op), [gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op)); #else @@ -156,12 +178,34 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); plt_write64(set_gw, ws_pair->getwrk_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -169,16 +213,22 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, } static __rte_always_inline uint16_t -cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) +cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, const void *const lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; plt_write64(BIT_ULL(16) | /* wait for work. */ 1, /* Use Mask set 0. */ ws->getwrk_op); + + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE " ldr %[tag], [%[tag_loc]] \n" @@ -190,7 +240,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -198,12 +251,35 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -218,6 +294,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -230,7 +307,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -238,12 +317,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -274,28 +366,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); - -uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c new file mode 100644 index 000000000..51ccaf4ec --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_burst.c b/drivers/event/cnxk/cn9k_worker_deq_burst.c new file mode 100644 index 000000000..4e2801459 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c new file mode 100644 index 000000000..9713d1ef0 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c new file mode 100644 index 000000000..709fa2d9e --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c new file mode 100644 index 000000000..d50e1cf83 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c new file mode 100644 index 000000000..a0508fdf0 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_##name(port, ev, \ + timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index b65d725f5..9d5d2d033 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -33,6 +33,7 @@ #define CNXK_SSO_MZ_NAME "cnxk_evdev_mz" #define CNXK_SSO_XAQ_CACHE_CNT (0x7) #define CNXK_SSO_XAQ_SLACK (8) +#define CNXK_SSO_WQE_SG_PTR (9) #define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) #define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY) diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index eda562f5b..c5c1c0ee8 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -11,8 +11,17 @@ endif sources = files( 'cn9k_eventdev.c', 'cn9k_worker.c', + 'cn9k_worker_deq.c', + 'cn9k_worker_deq_burst.c', + 'cn9k_worker_deq_tmo.c', + 'cn9k_worker_dual_deq.c', + 'cn9k_worker_dual_deq_burst.c', + 'cn9k_worker_dual_deq_tmo.c', 'cn10k_eventdev.c', 'cn10k_worker.c', + 'cn10k_worker_deq.c', + 'cn10k_worker_deq_burst.c', + 'cn10k_worker_deq_tmo.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 4 +- doc/guides/rel_notes/release_21_08.rst | 6 +- drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++++++ drivers/event/cnxk/cn9k_eventdev.c | 117 +++++++++++++++++++++++ drivers/event/cnxk/cnxk_eventdev.h | 21 +++- drivers/event/cnxk/cnxk_eventdev_adptr.c | 106 ++++++++++++++++++++ 6 files changed, 339 insertions(+), 6 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index b7e82c127..6fdccc2ab 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -42,7 +42,9 @@ Features of the OCTEON cnxk SSO PMD are: - HW managed packets enqueued from ethdev to eventdev exposed through event eth RX adapter. - N:1 ethernet device Rx queue to Event queue mapping. -- Full Rx offload support defined through ethdev queue configuration. +- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` + capability while maintaining receive packet order. +- Full Rx/Tx offload support defined through ethdev queue configuration. Prerequisites and Compilation procedure --------------------------------------- diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 3892c8017..80ff93269 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -60,10 +60,10 @@ New Features * Added net/cnxk driver which provides the support for the integrated ethernet device. -* **Added support for Marvell CN10K, CN9K, event Rx adapter.** +* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.** - * Added Rx adapter support for event/cnxk when the ethernet device requested is - net/cnxk. + * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested + is net/cnxk. Removed Items diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index ba7d95fff..8a9b04a3d 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); + ws->tx_base = ws->base; ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn10k_sso_hws) + + (sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + + return 0; +} + static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset, cn10k_sso_hws_flush_events); if (rc < 0) @@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + return cn10k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index e386cb784..bdc563223 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(dws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + dws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&dws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = dws; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + } + rte_mb(); + + return 0; +} + static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset, cn9k_sso_hws_flush_events); if (rc < 0) @@ -844,6 +908,55 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + return cn9k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -863,6 +976,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 9d5d2d033..458fdc8d9 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -8,6 +8,7 @@ #include <rte_devargs.h> #include <rte_ethdev.h> #include <rte_event_eth_rx_adapter.h> +#include <rte_event_eth_tx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -86,9 +87,12 @@ struct cnxk_sso_evdev { rte_iova_t fc_iova; struct rte_mempool *xaq_pool; uint64_t rx_offloads; + uint64_t tx_offloads; uint64_t adptr_xae_cnt; uint16_t rx_adptr_pool_cnt; uint64_t *rx_adptr_pools; + uint64_t *tx_adptr_data; + uint16_t max_port_id; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -115,7 +119,10 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; + /* Tx Fastpath data */ + uint64_t tx_base __rte_cache_aligned; uintptr_t lmt_base; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; /* CN9K HWS ops */ @@ -140,7 +147,9 @@ struct cn9k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; + /* Tx Fastpath data */ + uint64_t base __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cn9k_sso_hws_state { @@ -160,7 +169,9 @@ struct cn9k_sso_hws_dual { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base[2]; + /* Tx Fastpath data */ + uint64_t base[2] __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cnxk_sso_hws_cookie { @@ -267,5 +278,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); +int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); +int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 24bfd985e..548d7b81c 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -5,6 +5,8 @@ #include "cnxk_ethdev.h" #include "cnxk_eventdev.h" +#define CNXK_SSO_SQB_LIMIT (0x180) + void cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, uint32_t event_type) @@ -241,3 +243,107 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, return 0; } + +static int +cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs) +{ + uint16_t sqb_limit; + + sqb_limit = RTE_MIN(nb_sqb_bufs, sq->nb_sqb_bufs); + return roc_npa_aura_limit_modify(sq->aura_handle, sqb_limit); +} + +static int +cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev, + uint16_t eth_port_id, uint16_t tx_queue_id, + void *txq) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t max_port_id = dev->max_port_id; + uint64_t *txq_data = dev->tx_adptr_data; + + if (txq_data == NULL || eth_port_id > max_port_id) { + max_port_id = RTE_MAX(max_port_id, eth_port_id); + txq_data = rte_realloc_socket( + txq_data, + (sizeof(uint64_t) * (max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, event_dev->data->socket_id); + if (txq_data == NULL) + return -ENOMEM; + } + + ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) + txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq; + dev->max_port_id = max_port_id; + dev->tx_adptr_data = txq_data; + return 0; +} + +int +cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct roc_nix_sq *sq; + int i, ret; + void *txq; + + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { + txq = eth_dev->data->tx_queues[i]; + sq = &cnxk_eth_dev->sqs[i]; + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, i, txq); + if (ret < 0) + return ret; + } + } else { + txq = eth_dev->data->tx_queues[tx_queue_id]; + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, txq); + if (ret < 0) + return ret; + } + + dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags; + + return 0; +} + +int +cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct roc_nix_sq *sq; + int i, ret; + + RTE_SET_USED(event_dev); + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { + sq = &cnxk_eth_dev->sqs[i]; + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, + NULL); + if (ret < 0) + return ret; + } + } else { + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, NULL); + if (ret < 0) + return ret; + } + + return 0; +} -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 38 ++++++++ drivers/event/cnxk/cn10k_worker.h | 67 ++++++++++++++ drivers/event/cnxk/cn10k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn10k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_eventdev.c | 81 +++++++++++++++++ drivers/event/cnxk/cn9k_worker.h | 87 +++++++++++++++++++ drivers/event/cnxk/cn9k_worker_dual_tx_enq.c | 23 +++++ .../event/cnxk/cn9k_worker_dual_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/meson.build | 6 ++ 11 files changed, 417 insertions(+) create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 8a9b04a3d..e462f770c 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; @@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; } static void diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index b724083ca..3c90c8500 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -11,6 +11,7 @@ #include "cn10k_ethdev.h" #include "cn10k_rx.h" +#include "cn10k_tx.h" /* SSO Operations */ @@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn10k_eth_txq * +cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn10k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline uint16_t +cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, + uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + const struct cn10k_eth_txq *txq; + struct rte_mbuf *m = ev->mbuf; + uint16_t ref_cnt = m->refcnt; + uintptr_t lmt_addr; + uint16_t lmt_id; + uintptr_t pa; + + lmt_addr = ws->lmt_base; + ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + txq = cn10k_sso_hws_xtract_meta(m, txq_data); + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; + } + if (!ev->sched_type) + cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, + ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c new file mode 100644 index 000000000..f9968ac0d --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c new file mode 100644 index 000000000..a24fc42e5 --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index bdc563223..af97020f2 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -430,6 +430,39 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; @@ -510,6 +543,25 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) } } + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + if (dev->dual_ws) { event_dev->enqueue = cn9k_sso_hws_dual_enq; event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst; @@ -618,8 +670,37 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] + */ + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } } + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; rte_mb(); } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index c01c00e1d..5aa053c58 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -11,6 +11,7 @@ #include "cn9k_ethdev.h" #include "cn9k_rx.h" +#include "cn9k_tx.h" /* SSO Operations */ @@ -416,4 +417,90 @@ NIX_RX_FASTPATH_MODES NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn9k_eth_txq * +cn9k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn9k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline void +cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m, + uint64_t *cmd, const uint32_t flags) +{ + roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags)); + cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt); +} + +static __rte_always_inline uint16_t +cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + struct rte_mbuf *m = ev->mbuf; + const struct cn9k_eth_txq *txq; + uint16_t ref_cnt = m->refcnt; + + /* Perform header writes before barrier for TSO */ + cn9k_nix_xmit_prepare_tso(m, flags); + /* Lets commit any changes in the packet here in case when + * fast free is set as no further changes will be made to mbuf. + * In case of fast free is not set, both cn9k_nix_prepare_mseg() + * and cn9k_nix_xmit_prepare() has a barrier after refcnt update. + */ + if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_io_wmb(); + txq = cn9k_sso_hws_xtract_meta(m, txq_data); + cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags); + + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, + txq->io_addr, segdw); + } else { + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, + segdw); + } + } else { + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_one(cmd, txq->lmt_addr, + txq->io_addr, flags); + } else { + cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, + flags); + } + } + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG, + base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c new file mode 100644 index 000000000..92e2981f0 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws_dual *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c new file mode 100644 index 000000000..dfb574cf9 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws_dual *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c new file mode 100644 index 000000000..3df649c0c --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c new file mode 100644 index 000000000..0efe29113 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index c5c1c0ee8..13e0634e8 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -17,11 +17,17 @@ sources = files( 'cn9k_worker_dual_deq.c', 'cn9k_worker_dual_deq_burst.c', 'cn9k_worker_dual_deq_tmo.c', + 'cn9k_worker_tx_enq.c', + 'cn9k_worker_tx_enq_seg.c', + 'cn9k_worker_dual_tx_enq.c', + 'cn9k_worker_dual_tx_enq_seg.c', 'cn10k_eventdev.c', 'cn10k_worker.c', 'cn10k_worker_deq.c', 'cn10k_worker_deq_burst.c', 'cn10k_worker_deq_tmo.c', + 'cn10k_worker_tx_enq.c', + 'cn10k_worker_tx_enq_seg.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add event vector support for cnxk event Rx adapter, add control path APIs to get vector limits and ability to configure event vectorization on a given Rx queue. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 2 + drivers/event/cnxk/cn10k_eventdev.c | 106 ++++++++++++++++++++++- drivers/event/cnxk/cnxk_eventdev.h | 2 + drivers/event/cnxk/cnxk_eventdev_adptr.c | 25 ++++++ drivers/net/cnxk/cnxk_ethdev.h | 2 +- 5 files changed, 135 insertions(+), 2 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 6fdccc2ab..0297cd3d5 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -45,6 +45,8 @@ Features of the OCTEON cnxk SSO PMD are: - Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` capability while maintaining receive packet order. - Full Rx/Tx offload support defined through ethdev queue configuration. +- HW managed event vectorization on CN10K for packets enqueued from ethdev to + eventdev configurable per each Rx queue in Rx adapter. Prerequisites and Compilation procedure --------------------------------------- diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e462f770c..e85fa4785 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, else *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | - RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID | + RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR; return 0; } @@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_rx_adapter_vector_limits( + const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, + struct rte_event_eth_rx_adapter_vector_limits *limits) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + return -ENOTSUP; + + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + limits->log2_sz = true; + limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2; + limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2; + limits->min_timeout_ns = + (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100; + limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns; + + return 0; +} + +static int +cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev, + uint16_t port_id, uint16_t rq_id, uint16_t sz, + uint64_t tmo_ns, struct rte_mempool *vmp) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + + if (!rq->sso_ena) + return -EINVAL; + if (rq->flow_tag_width == 0) + return -EINVAL; + + rq->vwqe_ena = 1; + rq->vwqe_first_skip = 0; + rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id); + rq->vwqe_max_sz_exp = rte_log2_u32(sz); + rq->vwqe_wait_tmo = + tmo_ns / + ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100); + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= + (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4)) + << 24; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cn10k_sso_rx_adapter_vector_config( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_event_vector_config *config) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + struct cnxk_sso_evdev *dev; + int i, rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + dev = cnxk_sso_pmd_priv(event_dev); + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, i, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + } else { + + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, rx_queue_id, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + + return 0; +} + static int cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, uint32_t *caps) @@ -739,6 +839,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_rx_adapter_vector_limits_get = cn10k_sso_rx_adapter_vector_limits, + .eth_rx_adapter_event_vector_config = + cn10k_sso_rx_adapter_vector_config, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 458fdc8d9..3783e0c95 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -96,6 +96,8 @@ struct cnxk_sso_evdev { uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; + uint16_t vec_pool_cnt; + uint64_t *vec_pools; /* Dev args */ uint32_t xae_cnt; uint8_t qos_queue_cnt; diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 548d7b81c..c4c4f5a7f 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -40,6 +40,31 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, dev->adptr_xae_cnt += rxq->qconf.mp->size; break; } + case RTE_EVENT_TYPE_ETHDEV_VECTOR: { + struct rte_mempool *mp = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->vec_pool_cnt; i++) { + if ((uint64_t)mp == dev->vec_pools[i]) + return; + } + + dev->vec_pool_cnt++; + old_ptr = dev->vec_pools; + dev->vec_pools = + rte_realloc(dev->vec_pools, + sizeof(uint64_t) * dev->vec_pool_cnt, 0); + if (dev->vec_pools == NULL) { + dev->adptr_xae_cnt += mp->size; + dev->vec_pools = old_ptr; + dev->vec_pool_cnt--; + return; + } + dev->vec_pools[dev->vec_pool_cnt - 1] = (uint64_t)mp; + + dev->adptr_xae_cnt += mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h index 4eead0390..2528b3cda 100644 --- a/drivers/net/cnxk/cnxk_ethdev.h +++ b/drivers/net/cnxk/cnxk_ethdev.h @@ -238,7 +238,7 @@ struct cnxk_eth_txq_sp { } __plt_cache_aligned; static inline struct cnxk_eth_dev * -cnxk_eth_pmd_priv(struct rte_eth_dev *eth_dev) +cnxk_eth_pmd_priv(const struct rte_eth_dev *eth_dev) { return eth_dev->data->dev_private; } -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Rx event vector fastpath to convert HW defined metadata into rte_mbuf and rte_event_vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/rel_notes/release_21_08.rst | 1 + drivers/event/cnxk/cn10k_worker.h | 56 +++++++ drivers/net/cnxk/cn10k_rx.h | 200 +++++++++++++++---------- drivers/net/cnxk/cn10k_rx_vec.c | 2 +- drivers/net/cnxk/cn10k_rx_vec_mseg.c | 5 +- 5 files changed, 179 insertions(+), 85 deletions(-) diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 80ff93269..11ccc9bcb 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -64,6 +64,7 @@ New Features * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested is net/cnxk. + * Add support for event vectorization for Rx adapter. Removed Items diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 3c90c8500..7a48a6b17 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,6 +5,8 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include <rte_vect.h> + #include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" @@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, mbuf_init | ((uint64_t)port_id) << 48, flags); } +static __rte_always_inline void +cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, + void *lookup_mem, void *tstamp) +{ + uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + struct rte_event_vector *vec; + uint16_t nb_mbufs, non_vec; + uint64_t **wqe; + + mbuf_init |= ((uint64_t)port_id) << 48; + vec = (struct rte_event_vector *)vwqe; + wqe = vec->u64s; + + nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP); + nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs, + flags | NIX_RX_VWQE_F, lookup_mem, + tstamp); + wqe += nb_mbufs; + non_vec = vec->nb_elem - nb_mbufs; + + while (non_vec) { + struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0]; + struct rte_mbuf *mbuf; + uint64_t tstamp_ptr; + + mbuf = (struct rte_mbuf *)((char *)cqe - + sizeof(struct rte_mbuf)); + cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, + mbuf_init, flags); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + wqe[0] = (uint64_t *)mbuf; + non_vec--; + wqe++; + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, const uint32_t flags, void *lookup_mem) @@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, flags & NIX_RX_MULTI_SEG_F, (uint64_t *)tstamp_ptr); gw.u64[1] = mbuf; + } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV_VECTOR) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1]; + + vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | + ((vwqe_hdr & 0xFFFF) << 48) | + ((uint64_t)port << 32); + *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr; + cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem, + ws->tstamp); } } diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index d9572b19e..a506a867c 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -21,6 +21,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_RX_VWQE_F BIT(14) #define NIX_RX_MULTI_SEG_F BIT(15) #define CNXK_NIX_CQ_ENTRY_SZ 128 @@ -28,6 +29,11 @@ #define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x)) #define CQE_SZ(x) ((x) * CNXK_NIX_CQ_ENTRY_SZ) +#define CQE_PTR_OFF(b, i, o, f) \ + (((f) & NIX_RX_VWQE_F) ? \ + (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \ + (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o))) + union mbuf_initializer { struct { uint16_t data_off; @@ -317,61 +323,87 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf) } static __rte_always_inline uint16_t -cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) +cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, + const uint16_t flags, void *lookup_mem, + struct cnxk_timesync_info *tstamp) { - struct cn10k_eth_rxq *rxq = rx_queue; - uint16_t packets = 0; + struct cn10k_eth_rxq *rxq = args; + const uint64_t mbuf_initializer = (flags & NIX_RX_VWQE_F) ? + *(uint64_t *)args : + rxq->mbuf_initializer; + const uint64x2_t data_off = flags & NIX_RX_VWQE_F ? + vdupq_n_u64(0x80ULL) : + vdupq_n_u64(rxq->data_off); + const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask; + const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata; + const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc; uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23; - const uint64_t mbuf_initializer = rxq->mbuf_initializer; - const uint64x2_t data_off = vdupq_n_u64(rxq->data_off); uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3; uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer); struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3; - const uint16_t *lookup_mem = rxq->lookup_mem; - const uint32_t qmask = rxq->qmask; - const uint64_t wdata = rxq->wdata; - const uintptr_t desc = rxq->desc; uint8x16_t f0, f1, f2, f3; - uint32_t head = rxq->head; + uint16_t packets = 0; uint16_t pkts_left; - - pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); - pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); - - /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + uint32_t head; + uintptr_t cq0; + + if (!(flags & NIX_RX_VWQE_F)) { + lookup_mem = rxq->lookup_mem; + head = rxq->head; + + pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); + pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); + /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) + tstamp = rxq->tstamp; + } else { + RTE_SET_USED(head); + } while (packets < pkts) { - /* Exit loop if head is about to wrap and become unaligned */ - if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < - NIX_DESCS_PER_LOOP) { - pkts_left += (pkts - packets); - break; - } + if (!(flags & NIX_RX_VWQE_F)) { + /* Exit loop if head is about to wrap and become + * unaligned. + */ + if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < + NIX_DESCS_PER_LOOP) { + pkts_left += (pkts - packets); + break; + } - const uintptr_t cq0 = desc + CQE_SZ(head); + cq0 = desc + CQE_SZ(head); + } else { + cq0 = (uintptr_t)&mbufs[packets]; + } /* Prefetch N desc ahead */ - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(8))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(9))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(10))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(11))); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 8, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 9, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 10, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 11, 0, flags)); /* Get NIX_RX_SG_S for size and buffer pointer */ - cq0_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(0) + 64)); - cq1_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(1) + 64)); - cq2_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(2) + 64)); - cq3_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(3) + 64)); - - /* Extract mbuf from NIX_RX_SG_S */ - mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); - mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); - mbuf01 = vqsubq_u64(mbuf01, data_off); - mbuf23 = vqsubq_u64(mbuf23, data_off); + cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags)); + cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags)); + cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags)); + cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags)); + + if (!(flags & NIX_RX_VWQE_F)) { + /* Extract mbuf from NIX_RX_SG_S */ + mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); + mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); + mbuf01 = vqsubq_u64(mbuf01, data_off); + mbuf23 = vqsubq_u64(mbuf23, data_off); + } else { + mbuf01 = + vsubq_u64(vld1q_u64((uint64_t *)cq0), data_off); + mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)), + data_off); + } /* Move mbufs to scalar registers for future use */ mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0); @@ -395,14 +427,14 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, f3 = vqtbl1q_u8(cq3_w8, shuf_msk); /* Load CQE word0 and word 1 */ - uint64_t cq0_w0 = ((uint64_t *)(cq0 + CQE_SZ(0)))[0]; - uint64_t cq0_w1 = ((uint64_t *)(cq0 + CQE_SZ(0)))[1]; - uint64_t cq1_w0 = ((uint64_t *)(cq0 + CQE_SZ(1)))[0]; - uint64_t cq1_w1 = ((uint64_t *)(cq0 + CQE_SZ(1)))[1]; - uint64_t cq2_w0 = ((uint64_t *)(cq0 + CQE_SZ(2)))[0]; - uint64_t cq2_w1 = ((uint64_t *)(cq0 + CQE_SZ(2)))[1]; - uint64_t cq3_w0 = ((uint64_t *)(cq0 + CQE_SZ(3)))[0]; - uint64_t cq3_w1 = ((uint64_t *)(cq0 + CQE_SZ(3)))[1]; + const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags); + const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 1, flags); + const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags); + const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 1, flags); + const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags); + const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 1, flags); + const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags); + const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 1, flags); if (flags & NIX_RX_OFFLOAD_RSS_F) { /* Fill rss in the rx_descriptor_fields1 */ @@ -459,17 +491,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) { ol_flags0 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(0) + 38), ol_flags0, - mbuf0); + *(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags), + ol_flags0, mbuf0); ol_flags1 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(1) + 38), ol_flags1, - mbuf1); + *(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags), + ol_flags1, mbuf1); ol_flags2 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(2) + 38), ol_flags2, - mbuf2); + *(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags), + ol_flags2, mbuf2); ol_flags3 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(3) + 38), ol_flags3, - mbuf3); + *(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags), + ol_flags3, mbuf3); } if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { @@ -488,7 +520,7 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, RTE_PTYPE_L2_ETHER_TIMESYNC}; const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | PKT_RX_IEEE1588_TMST | - rxq->tstamp->rx_tstamp_dynflag; + tstamp->rx_tstamp_dynflag; const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; uint64x2_t ts01, ts23, mask; uint64_t ts[4]; @@ -526,14 +558,10 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, ts[3] = vgetq_lane_u64(ts23, 1); /* Store timestamp into dynfield. */ - *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = - ts[0]; - *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = - ts[1]; - *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = - ts[2]; - *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = - ts[3]; + *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3]; /* Generate ptype mask to filter L2 ether timesync */ mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); @@ -559,9 +587,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, /* Update Rxq timestamp with the latest * timestamp. */ - rxq->tstamp->rx_ready = 1; - rxq->tstamp->rx_tstamp = - ts[31 - __builtin_clz(res)]; + tstamp->rx_ready = 1; + tstamp->rx_tstamp = ts[31 - __builtin_clz(res)]; } } @@ -584,25 +611,25 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); /* Store the mbufs to rx_pkts */ - vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); - vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); + vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); if (flags & NIX_RX_MULTI_SEG_F) { /* Multi segment is enable build mseg list for * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 0, 8, flags)), + mbuf0, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 1, 8, flags)), + mbuf1, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 2, 8, flags)), + mbuf2, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 3, 8, flags)), + mbuf3, mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; @@ -623,12 +650,18 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, __mempool_check_cookies(mbuf2->pool, (void **)&mbuf2, 1, 1); __mempool_check_cookies(mbuf3->pool, (void **)&mbuf3, 1, 1); - /* Advance head pointer and packets */ - head += NIX_DESCS_PER_LOOP; - head &= qmask; packets += NIX_DESCS_PER_LOOP; + + if (!(flags & NIX_RX_VWQE_F)) { + /* Advance head pointer and packets */ + head += NIX_DESCS_PER_LOOP; + head &= qmask; + } } + if (flags & NIX_RX_VWQE_F) + return packets; + rxq->head = head; rxq->available -= packets; @@ -637,8 +670,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, plt_write64((rxq->wdata | packets), rxq->cq_door); if (unlikely(pkts_left)) - packets += cn10k_nix_recv_pkts(rx_queue, &rx_pkts[packets], - pkts_left, flags); + packets += cn10k_nix_recv_pkts(args, &mbufs[packets], pkts_left, + flags); return packets; } @@ -647,12 +680,15 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, static inline uint16_t cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) + uint16_t pkts, const uint16_t flags, + void *lookup_mem, void *tstamp) { + RTE_SET_USED(lookup_mem); RTE_SET_USED(rx_queue); RTE_SET_USED(rx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(flags); + RTE_SET_USED(tstamp); return 0; } diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c index 93528a44f..166735ad5 100644 --- a/drivers/net/cnxk/cn10k_rx_vec.c +++ b/drivers/net/cnxk/cn10k_rx_vec.c @@ -12,7 +12,7 @@ uint16_t pkts) \ { \ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags)); \ + (flags), NULL, NULL); \ } NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c index 04d1e46c8..1f44ddddd 100644 --- a/drivers/net/cnxk/cn10k_rx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c @@ -9,8 +9,9 @@ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ { \ - return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags) | NIX_RX_MULTI_SEG_F); \ + return cn10k_nix_recv_pkts_vector( \ + rx_queue, rx_pkts, pkts, (flags) | NIX_RX_MULTI_SEG_F, \ + NULL, NULL); \ } NIX_RX_FASTPATH_MODES -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Tx event vector fastpath, integrate event vector Tx routine into Tx burst. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 1 + doc/guides/rel_notes/release_21_08.rst | 2 +- drivers/common/cnxk/roc_sso.h | 23 ++++++ drivers/event/cnxk/cn10k_eventdev.c | 3 +- drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++-- drivers/event/cnxk/cn9k_worker.h | 4 +- drivers/event/cnxk/cnxk_worker.h | 22 ------ drivers/net/cnxk/cn10k_tx.c | 2 +- drivers/net/cnxk/cn10k_tx.h | 52 +++++++++---- drivers/net/cnxk/cn10k_tx_mseg.c | 3 +- drivers/net/cnxk/cn10k_tx_vec.c | 2 +- drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +- 12 files changed, 167 insertions(+), 53 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 0297cd3d5..53560d383 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -47,6 +47,7 @@ Features of the OCTEON cnxk SSO PMD are: - Full Rx/Tx offload support defined through ethdev queue configuration. - HW managed event vectorization on CN10K for packets enqueued from ethdev to eventdev configurable per each Rx queue in Rx adapter. +- Event vector transmission via Tx adapter. Prerequisites and Compilation procedure --------------------------------------- diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 11ccc9bcb..9e49cb27d 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -64,7 +64,7 @@ New Features * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested is net/cnxk. - * Add support for event vectorization for Rx adapter. + * Add support for event vectorization for Rx/Tx adapter. Removed Items diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h index a6030e7d8..316c6ccd5 100644 --- a/drivers/common/cnxk/roc_sso.h +++ b/drivers/common/cnxk/roc_sso.h @@ -44,6 +44,29 @@ struct roc_sso { uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned; } __plt_cache_aligned; +static __rte_always_inline void +roc_sso_hws_head_wait(uintptr_t tag_op) +{ +#ifdef RTE_ARCH_ARM64 + uint64_t tag; + + asm volatile(PLT_CPU_FEATURE_PREAMBLE + " ldr %[tag], [%[tag_op]] \n" + " tbnz %[tag], 35, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[tag], [%[tag_op]] \n" + " tbz %[tag], 35, rty%= \n" + "done%=: \n" + : [tag] "=&r"(tag) + : [tag_op] "r"(tag_op)); +#else + /* Wait for the SWTAG/SWTAG_FULL operation */ + while (!(plt_read64(tag_op) & BIT_ULL(35))) + ; +#endif +} + /* SSO device initialization */ int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso); int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso); diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e85fa4785..6f37c5bd2 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, if (ret) *caps = 0; else - *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR; return 0; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 7a48a6b17..9cc099206 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R -static __rte_always_inline const struct cn10k_eth_txq * +static __rte_always_inline struct cn10k_eth_txq * cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) { - return (const struct cn10k_eth_txq *) + return (struct cn10k_eth_txq *) txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; } +static __rte_always_inline void +cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs, + uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr, + uint8_t sched_type, uintptr_t base, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + uint16_t port[4], queue[4]; + struct cn10k_eth_txq *txq; + uint16_t i, j; + uintptr_t pa; + + for (i = 0; i < nb_mbufs; i += 4) { + port[0] = mbufs[i]->port; + port[1] = mbufs[i + 1]->port; + port[2] = mbufs[i + 2]->port; + port[3] = mbufs[i + 3]->port; + + queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]); + queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]); + queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]); + queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]); + + if (((port[0] ^ port[1]) & (port[2] ^ port[3])) || + ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) { + + for (j = 0; j < 4; j++) { + struct rte_mbuf *m = mbufs[i + j]; + + txq = (struct cn10k_eth_txq *) + txq_data[port[j]][queue[j]]; + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier + * for TSO + */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, + txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg( + m, (uint64_t *)lmt_addr, + flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | + (cn10k_nix_tx_ext_subs(flags) + 1) + << 4; + } + if (!sched_type) + roc_sso_hws_head_wait(base + + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + } + } else { + txq = (struct cn10k_eth_txq *) + txq_data[port[0]][queue[0]]; + cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base + + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], const uint32_t flags) { - const struct cn10k_eth_txq *txq; - struct rte_mbuf *m = ev->mbuf; - uint16_t ref_cnt = m->refcnt; + struct cn10k_eth_txq *txq; + struct rte_mbuf *m; uintptr_t lmt_addr; + uint16_t ref_cnt; uint16_t lmt_id; uintptr_t pa; lmt_addr = ws->lmt_base; ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + + if (ev->event_type & RTE_EVENT_TYPE_VECTOR) { + struct rte_mbuf **mbufs = ev->vec->mbufs; + uint64_t meta = *(uint64_t *)ev->vec; + + if (meta & BIT(31)) { + txq = (struct cn10k_eth_txq *) + txq_data[meta >> 32][meta >> 48]; + + cn10k_nix_xmit_pkts_vector( + txq, mbufs, meta & 0xFFFF, cmd, + ws->tx_base + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } else { + cn10k_sso_vwqe_split_tx( + mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr, + ev->sched_type, ws->tx_base, txq_data, flags); + } + rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec); + return (meta & 0xFFFF); + } + + m = ev->mbuf; + ref_cnt = m->refcnt; txq = cn10k_sso_hws_xtract_meta(m, txq_data); cn10k_nix_tx_skeleton(txq, cmd, flags); /* Perform header writes before barrier for TSO */ @@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; } if (!ev->sched_type) - cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); roc_lmt_submit_steorl(lmt_id, pa); @@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); - return 1; } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 5aa053c58..ef1e83741 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -458,7 +458,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, segdw); @@ -469,7 +469,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, } else { if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags); diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h index 4eb46ae16..945132b74 100644 --- a/drivers/event/cnxk/cnxk_worker.h +++ b/drivers/event/cnxk/cnxk_worker.h @@ -75,27 +75,5 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op) #endif } -static __rte_always_inline void -cnxk_sso_hws_head_wait(uintptr_t tag_op) -{ -#ifdef RTE_ARCH_ARM64 - uint64_t swtp; - - asm volatile(PLT_CPU_FEATURE_PREAMBLE - " ldr %[swtb], [%[swtp_loc]] \n" - " tbz %[swtb], 35, done%= \n" - " sevl \n" - "rty%=: wfe \n" - " ldr %[swtb], [%[swtp_loc]] \n" - " tbnz %[swtb], 35, rty%= \n" - "done%=: \n" - : [swtb] "=&r"(swtp) - : [swtp_loc] "r"(tag_op)); -#else - /* Wait for the SWTAG/SWTAG_FULL operation */ - while (plt_read64(tag_op) & BIT_ULL(35)) - ; -#endif -} #endif diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 1f30bab59..0e1276c60 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \ - flags); \ + 0, flags); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 532b53b31..d2a24120e 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -18,6 +18,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_TX_VWQE_F BIT(14) #define NIX_TX_MULTI_SEG_F BIT(15) #define NIX_TX_NEED_SEND_HDR_W1 \ @@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) static __rte_always_inline uint16_t cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, - uint64_t *cmd, const uint16_t flags) + uint64_t *cmd, uintptr_t base, const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; const rte_iova_t io_addr = txq->io_addr; @@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t lso_tun_fmt; uint64_t data; - NIX_XMIT_FC_OR_RETURN(txq, pkts); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } /* Get cmd skeleton */ cn10k_nix_tx_skeleton(txq, cmd, flags); - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; - if (flags & NIX_TX_OFFLOAD_TSO_F) lso_tun_fmt = txq->lso_tun_fmt; @@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2); } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (burst > 16) { data = cn10k_nix_tx_steor_data(flags); @@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; uintptr_t pa0, pa1, lmt_addr = txq->lmt_base; @@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, shft += 3; } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + data0 = (uint64_t)data128; data1 = (uint64_t)(data128 >> 64); /* Make data0 similar to data1 */ @@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; @@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64_t data[2]; } wd; - NIX_XMIT_FC_OR_RETURN(txq, pkts); - - scalar = pkts & (NIX_DESCS_PER_LOOP - 1); - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } else { + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + } - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; /* Perform header writes before barrier for TSO */ if (flags & NIX_TX_OFFLOAD_TSO_F) { for (i = 0; i < pkts; i++) @@ -1972,6 +1986,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (flags & NIX_TX_MULTI_SEG_F) wd.data[0] >>= 16; + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (lnum > 16) { if (!(flags & NIX_TX_MULTI_SEG_F)) @@ -2028,10 +2045,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (unlikely(scalar)) { if (flags & NIX_TX_MULTI_SEG_F) pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, - scalar, cmd, flags); + scalar, cmd, base, + flags); else pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, - cmd, flags); + cmd, base, flags); } return pkts; @@ -2040,13 +2058,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, #else static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { RTE_SET_USED(tx_queue); RTE_SET_USED(tx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(cmd); RTE_SET_USED(flags); + RTE_SET_USED(base); return 0; } #endif diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c index 33f675472..4ea4c8a4e 100644 --- a/drivers/net/cnxk/cn10k_tx_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_mseg.c @@ -18,7 +18,8 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \ - (flags) | NIX_TX_MULTI_SEG_F); \ + 0, (flags) \ + | NIX_TX_MULTI_SEG_F); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 34e373750..a0350496a 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -18,7 +18,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ - (flags)); \ + 0, (flags)); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c index 1fad81dba..7f98f79b9 100644 --- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector( \ - tx_queue, tx_pkts, pkts, cmd, \ + tx_queue, tx_pkts, pkts, cmd, 0, \ (flags) | NIX_TX_MULTI_SEG_F); \ } -- 2.17.1
On Tue, Jun 29, 2021 at 01:11:43AM +0530, pbhagavatula@marvell.com wrote: > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Add multi segment Tx vector routine. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > --- > drivers/net/cnxk/cn10k_tx.c | 20 +- > drivers/net/cnxk/cn10k_tx.h | 388 +++++++++++++++++++++++++-- > drivers/net/cnxk/cn10k_tx_vec_mseg.c | 24 ++ > drivers/net/cnxk/cn9k_tx.c | 20 +- > drivers/net/cnxk/cn9k_tx.h | 272 ++++++++++++++++++- > drivers/net/cnxk/cn9k_tx_vec_mseg.c | 24 ++ > drivers/net/cnxk/meson.build | 6 +- > 7 files changed, 709 insertions(+), 45 deletions(-) > create mode 100644 drivers/net/cnxk/cn10k_tx_vec_mseg.c > create mode 100644 drivers/net/cnxk/cn9k_tx_vec_mseg.c > > diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c > index d06879163..1f30bab59 100644 > --- a/drivers/net/cnxk/cn10k_tx.c > +++ b/drivers/net/cnxk/cn10k_tx.c > @@ -67,13 +67,23 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) > #undef T > }; > > - if (dev->scalar_ena) > + const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = { > +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ > + [f5][f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_vec_mseg_##name, > + > + NIX_TX_FASTPATH_MODES > +#undef T > + }; > + > + if (dev->scalar_ena) { > pick_tx_func(eth_dev, nix_eth_tx_burst); > - else > + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) > + pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); > + } else { > pick_tx_func(eth_dev, nix_eth_tx_vec_burst); > - > - if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) > - pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); > + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) > + pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg); > + } > > rte_mb(); > } > diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h > index 26797581e..532b53b31 100644 > --- a/drivers/net/cnxk/cn10k_tx.h > +++ b/drivers/net/cnxk/cn10k_tx.h > @@ -42,6 +42,13 @@ > } \ > } while (0) > > +/* Encoded number of segments to number of dwords macro, each value of nb_segs > + * is encoded as 4bits. > + */ > +#define NIX_SEGDW_MAGIC 0x76654432210ULL > + > +#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF) > + > #define LMT_OFF(lmt_addr, lmt_num, offset) \ > (void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset)) > > @@ -102,6 +109,14 @@ cn10k_nix_tx_steor_data(const uint16_t flags) > return data; > } > > +static __rte_always_inline uint8_t > +cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags) > +{ > + return ((flags & NIX_TX_NEED_EXT_HDR) ? > + (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 : > + 4); > +} > + > static __rte_always_inline uint64_t > cn10k_nix_tx_steor_vec_data(const uint16_t flags) > { > @@ -729,7 +744,244 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, > } > } > > +static __rte_always_inline void > +cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd, > + union nix_send_hdr_w0_u *sh, > + union nix_send_sg_s *sg, const uint32_t flags) > +{ > + struct rte_mbuf *m_next; > + uint64_t *slist, sg_u; > + uint16_t nb_segs; > + int i = 1; > + > + sh->total = m->pkt_len; > + /* Clear sg->u header before use */ > + sg->u &= 0xFC00000000000000; > + sg_u = sg->u; > + slist = &cmd[0]; > + > + sg_u = sg_u | ((uint64_t)m->data_len); > + > + nb_segs = m->nb_segs - 1; > + m_next = m->next; > + > + /* Set invert df if buffer is not to be freed by H/W */ > + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) > + sg_u |= (cnxk_nix_prefree_seg(m) << 55); > + /* Mark mempool object as "put" since it is freed by NIX */ > +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG > + if (!(sg_u & (1ULL << 55))) > + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); > + rte_io_wmb(); > +#endif > + > + m = m_next; > + /* Fill mbuf segments */ > + do { > + m_next = m->next; > + sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); > + *slist = rte_mbuf_data_iova(m); > + /* Set invert df if buffer is not to be freed by H/W */ > + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) > + sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55)); > + /* Mark mempool object as "put" since it is freed by NIX > + */ > +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG > + if (!(sg_u & (1ULL << (i + 55)))) > + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); > + rte_io_wmb(); > +#endif > + slist++; > + i++; > + nb_segs--; > + if (i > 2 && nb_segs) { > + i = 0; > + /* Next SG subdesc */ > + *(uint64_t *)slist = sg_u & 0xFC00000000000000; > + sg->u = sg_u; > + sg->segs = 3; > + sg = (union nix_send_sg_s *)slist; > + sg_u = sg->u; > + slist++; > + } > + m = m_next; > + } while (nb_segs); > + > + sg->u = sg_u; > + sg->segs = i; > +} > + > +static __rte_always_inline void > +cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0, > + uint64x2_t *cmd1, const uint8_t segdw, > + const uint32_t flags) > +{ > + union nix_send_hdr_w0_u sh; > + union nix_send_sg_s sg; > + > + if (m->nb_segs == 1) { > + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { > + sg.u = vgetq_lane_u64(cmd1[0], 0); > + sg.u |= (cnxk_nix_prefree_seg(m) << 55); > + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); > + } > + > +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG > + sg.u = vgetq_lane_u64(cmd1[0], 0); > + if (!(sg.u & (1ULL << 55))) > + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); > + rte_io_wmb(); > +#endif > + return; > + } > + > + sh.u = vgetq_lane_u64(cmd0[0], 0); > + sg.u = vgetq_lane_u64(cmd1[0], 0); > + > + cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags); > + > + sh.sizem1 = segdw - 1; > + cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0); > + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); > +} > + > #define NIX_DESCS_PER_LOOP 4 > + > +static __rte_always_inline uint8_t > +cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0, > + uint64x2_t *cmd1, uint64x2_t *cmd2, > + uint64x2_t *cmd3, uint8_t *segdw, > + uint64_t *lmt_addr, __uint128_t *data128, > + uint8_t *shift, const uint16_t flags) > +{ > + uint8_t j, off, lmt_used; > + > + if (!(flags & NIX_TX_NEED_EXT_HDR) && > + !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) { > + /* No segments in 4 consecutive packets. */ > + if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) { > + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) > + cn10k_nix_prepare_mseg_vec(mbufs[j], NULL, > + &cmd0[j], &cmd1[j], > + segdw[j], flags); > + vst1q_u64(lmt_addr, cmd0[0]); > + vst1q_u64(lmt_addr + 2, cmd1[0]); > + vst1q_u64(lmt_addr + 4, cmd0[1]); > + vst1q_u64(lmt_addr + 6, cmd1[1]); > + vst1q_u64(lmt_addr + 8, cmd0[2]); > + vst1q_u64(lmt_addr + 10, cmd1[2]); > + vst1q_u64(lmt_addr + 12, cmd0[3]); > + vst1q_u64(lmt_addr + 14, cmd1[3]); > + > + *data128 |= ((__uint128_t)7) << *shift; > + shift += 3; > + > + return 1; > + } > + } > + > + lmt_used = 0; > + for (j = 0; j < NIX_DESCS_PER_LOOP;) { > + /* Fit consecutive packets in same LMTLINE. */ > + if ((segdw[j] + segdw[j + 1]) <= 8) { > + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { > + cn10k_nix_prepare_mseg_vec(mbufs[j], NULL, > + &cmd0[j], &cmd1[j], > + segdw[j], flags); > + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL, > + &cmd0[j + 1], > + &cmd1[j + 1], > + segdw[j + 1], flags); > + /* TSTAMP takes 4 each, no segs. */ > + vst1q_u64(lmt_addr, cmd0[j]); > + vst1q_u64(lmt_addr + 2, cmd2[j]); > + vst1q_u64(lmt_addr + 4, cmd1[j]); > + vst1q_u64(lmt_addr + 6, cmd3[j]); > + > + vst1q_u64(lmt_addr + 8, cmd0[j + 1]); > + vst1q_u64(lmt_addr + 10, cmd2[j + 1]); > + vst1q_u64(lmt_addr + 12, cmd1[j + 1]); > + vst1q_u64(lmt_addr + 14, cmd3[j + 1]); > + } else if (flags & NIX_TX_NEED_EXT_HDR) { > + /* EXT header take 3 each, space for 2 segs.*/ > + cn10k_nix_prepare_mseg_vec(mbufs[j], > + lmt_addr + 6, > + &cmd0[j], &cmd1[j], > + segdw[j], flags); > + vst1q_u64(lmt_addr, cmd0[j]); > + vst1q_u64(lmt_addr + 2, cmd2[j]); > + vst1q_u64(lmt_addr + 4, cmd1[j]); > + off = segdw[j] - 3; > + off <<= 1; > + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], > + lmt_addr + 12 + off, > + &cmd0[j + 1], > + &cmd1[j + 1], > + segdw[j + 1], flags); > + vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]); > + vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]); > + vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]); > + } else { > + cn10k_nix_prepare_mseg_vec(mbufs[j], > + lmt_addr + 4, > + &cmd0[j], &cmd1[j], > + segdw[j], flags); > + vst1q_u64(lmt_addr, cmd0[j]); > + vst1q_u64(lmt_addr + 2, cmd1[j]); > + off = segdw[j] - 2; > + off <<= 1; > + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], > + lmt_addr + 8 + off, > + &cmd0[j + 1], > + &cmd1[j + 1], > + segdw[j + 1], flags); > + vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]); > + vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]); > + } > + *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1) > + << *shift; > + *shift += 3; > + j += 2; > + } else { > + if ((flags & NIX_TX_NEED_EXT_HDR) && > + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { > + cn10k_nix_prepare_mseg_vec(mbufs[j], > + lmt_addr + 6, > + &cmd0[j], &cmd1[j], > + segdw[j], flags); > + vst1q_u64(lmt_addr, cmd0[j]); > + vst1q_u64(lmt_addr + 2, cmd2[j]); > + vst1q_u64(lmt_addr + 4, cmd1[j]); > + off = segdw[j] - 4; > + off <<= 1; > + vst1q_u64(lmt_addr + 6 + off, cmd3[j]); > + } else if (flags & NIX_TX_NEED_EXT_HDR) { > + cn10k_nix_prepare_mseg_vec(mbufs[j], > + lmt_addr + 6, > + &cmd0[j], &cmd1[j], > + segdw[j], flags); > + vst1q_u64(lmt_addr, cmd0[j]); > + vst1q_u64(lmt_addr + 2, cmd2[j]); > + vst1q_u64(lmt_addr + 4, cmd1[j]); > + } else { > + cn10k_nix_prepare_mseg_vec(mbufs[j], > + lmt_addr + 4, > + &cmd0[j], &cmd1[j], > + segdw[j], flags); > + vst1q_u64(lmt_addr, cmd0[j]); > + vst1q_u64(lmt_addr + 2, cmd1[j]); > + } > + *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift; > + *shift += 3; > + j++; > + } > + lmt_used++; > + lmt_addr += 16; > + } > + > + return lmt_used; > +} > + > static __rte_always_inline uint16_t > cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > uint16_t pkts, uint64_t *cmd, const uint16_t flags) > @@ -738,7 +990,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; > uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], > cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP]; > - uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa; > + uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa; > uint64x2_t senddesc01_w0, senddesc23_w0; > uint64x2_t senddesc01_w1, senddesc23_w1; > uint16_t left, scalar, burst, i, lmt_id; > @@ -746,6 +998,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > uint64x2_t sendext01_w1, sendext23_w1; > uint64x2_t sendmem01_w0, sendmem23_w0; > uint64x2_t sendmem01_w1, sendmem23_w1; > + uint8_t segdw[NIX_DESCS_PER_LOOP + 1]; > uint64x2_t sgdesc01_w0, sgdesc23_w0; > uint64x2_t sgdesc01_w1, sgdesc23_w1; > struct cn10k_eth_txq *txq = tx_queue; > @@ -754,7 +1007,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > uint64x2_t ltypes01, ltypes23; > uint64x2_t xtmp128, ytmp128; > uint64x2_t xmask01, xmask23; > - uint8_t lnum; > + uint8_t lnum, shift; > + union wdata { > + __uint128_t data128; > + uint64_t data[2]; > + } wd; > > NIX_XMIT_FC_OR_RETURN(txq, pkts); > > @@ -798,8 +1055,43 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > burst = left > cn10k_nix_pkts_per_vec_brst(flags) ? > cn10k_nix_pkts_per_vec_brst(flags) : > left; > + if (flags & NIX_TX_MULTI_SEG_F) { > + wd.data128 = 0; > + shift = 16; > + } > lnum = 0; > + > for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) { > + if (flags & NIX_TX_MULTI_SEG_F) { > + struct rte_mbuf *m = tx_pkts[j]; > + uint8_t j; [Nithin] I guess it moved out of below loop while rebasing. With this fixed, Series-acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com> > + > + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) { > + /* Get dwords based on nb_segs. */ > + segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs); > + /* Add dwords based on offloads. */ > + segdw[j] += 1 + /* SEND HDR */ > + !!(flags & NIX_TX_NEED_EXT_HDR) + > + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); > + } > + > + /* Check if there are enough LMTLINES for this loop */ > + if (lnum + 4 > 32) { > + uint8_t ldwords_con = 0, lneeded = 0; > + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) { > + ldwords_con += segdw[j]; > + if (ldwords_con > 8) { > + lneeded += 1; > + ldwords_con = segdw[j]; > + } > + } > + lneeded += 1; > + if (lnum + lneeded > 32) { > + burst = i; > + break; > + } > + } > + } > /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */ > senddesc01_w0 = > vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF)); > @@ -1527,7 +1819,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > sendext23_w0 = vld1q_u64(sx_w0 + 2); > } > > - if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { > + if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) && > + !(flags & NIX_TX_MULTI_SEG_F)) { > /* Set don't free bit if reference count > 1 */ > xmask01 = vdupq_n_u64(0); > xmask23 = xmask01; > @@ -1567,7 +1860,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > (void **)&mbuf3, 1, 0); > senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); > senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); > - } else { > + } else if (!(flags & NIX_TX_MULTI_SEG_F)) { > /* Move mbufs to iova */ > mbuf0 = (uint64_t *)tx_pkts[0]; > mbuf1 = (uint64_t *)tx_pkts[1]; > @@ -1612,7 +1905,19 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); > } > > - if (flags & NIX_TX_NEED_EXT_HDR) { > + if (flags & NIX_TX_MULTI_SEG_F) { > + uint8_t j; > + > + segdw[4] = 8; > + j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1, > + cmd2, cmd3, segdw, > + (uint64_t *) > + LMT_OFF(laddr, lnum, > + 0), > + &wd.data128, &shift, > + flags); > + lnum += j; > + } else if (flags & NIX_TX_NEED_EXT_HDR) { > /* Store the prepared send desc to LMT lines */ > if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { > vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); > @@ -1664,34 +1969,55 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; > } > > + if (flags & NIX_TX_MULTI_SEG_F) > + wd.data[0] >>= 16; > + > /* Trigger LMTST */ > if (lnum > 16) { > - data = cn10k_nix_tx_steor_vec_data(flags); > - pa = io_addr | (data & 0x7) << 4; > - data &= ~0x7ULL; > - data |= (15ULL << 12); > - data |= (uint64_t)lmt_id; > + if (!(flags & NIX_TX_MULTI_SEG_F)) > + wd.data[0] = cn10k_nix_tx_steor_vec_data(flags); > + > + pa = io_addr | (wd.data[0] & 0x7) << 4; > + wd.data[0] &= ~0x7ULL; > + > + if (flags & NIX_TX_MULTI_SEG_F) > + wd.data[0] <<= 16; > + > + wd.data[0] |= (15ULL << 12); > + wd.data[0] |= (uint64_t)lmt_id; > > /* STEOR0 */ > - roc_lmt_submit_steorl(data, pa); > + roc_lmt_submit_steorl(wd.data[0], pa); > > - data = cn10k_nix_tx_steor_vec_data(flags); > - pa = io_addr | (data & 0x7) << 4; > - data &= ~0x7ULL; > - data |= ((uint64_t)(lnum - 17)) << 12; > - data |= (uint64_t)(lmt_id + 16); > + if (!(flags & NIX_TX_MULTI_SEG_F)) > + wd.data[1] = cn10k_nix_tx_steor_vec_data(flags); > + > + pa = io_addr | (wd.data[1] & 0x7) << 4; > + wd.data[1] &= ~0x7ULL; > + > + if (flags & NIX_TX_MULTI_SEG_F) > + wd.data[1] <<= 16; > + > + wd.data[1] |= ((uint64_t)(lnum - 17)) << 12; > + wd.data[1] |= (uint64_t)(lmt_id + 16); > > /* STEOR1 */ > - roc_lmt_submit_steorl(data, pa); > + roc_lmt_submit_steorl(wd.data[1], pa); > } else if (lnum) { > - data = cn10k_nix_tx_steor_vec_data(flags); > - pa = io_addr | (data & 0x7) << 4; > - data &= ~0x7ULL; > - data |= ((uint64_t)(lnum - 1)) << 12; > - data |= lmt_id; > + if (!(flags & NIX_TX_MULTI_SEG_F)) > + wd.data[0] = cn10k_nix_tx_steor_vec_data(flags); > + > + pa = io_addr | (wd.data[0] & 0x7) << 4; > + wd.data[0] &= ~0x7ULL; > + > + if (flags & NIX_TX_MULTI_SEG_F) > + wd.data[0] <<= 16; > + > + wd.data[0] |= ((uint64_t)(lnum - 1)) << 12; > + wd.data[0] |= lmt_id; > > /* STEOR0 */ > - roc_lmt_submit_steorl(data, pa); > + roc_lmt_submit_steorl(wd.data[0], pa); > } > > left -= burst; > @@ -1699,9 +2025,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > if (left) > goto again; > > - if (unlikely(scalar)) > - pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd, > - flags); > + if (unlikely(scalar)) { > + if (flags & NIX_TX_MULTI_SEG_F) > + pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, > + scalar, cmd, flags); > + else > + pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, > + cmd, flags); > + } > > return pkts; > } > @@ -1866,7 +2197,10 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \ > void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ > \ > uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \ > - void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); > + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ > + \ > + uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \ > + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ > > NIX_TX_FASTPATH_MODES > #undef T > diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c > new file mode 100644 > index 000000000..1fad81dba > --- /dev/null > +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c > @@ -0,0 +1,24 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(C) 2021 Marvell. > + */ > + > +#include "cn10k_ethdev.h" > +#include "cn10k_tx.h" > + > +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ > + uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \ > + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \ > + { \ > + uint64_t cmd[sz]; \ > + \ > + /* For TSO inner checksum is a must */ \ > + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ > + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ > + return 0; \ > + return cn10k_nix_xmit_pkts_vector( \ > + tx_queue, tx_pkts, pkts, cmd, \ > + (flags) | NIX_TX_MULTI_SEG_F); \ > + } > + > +NIX_TX_FASTPATH_MODES > +#undef T > diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c > index 735e21cc6..763f9a14f 100644 > --- a/drivers/net/cnxk/cn9k_tx.c > +++ b/drivers/net/cnxk/cn9k_tx.c > @@ -66,13 +66,23 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) > #undef T > }; > > - if (dev->scalar_ena) > + const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = { > +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ > + [f5][f4][f3][f2][f1][f0] = cn9k_nix_xmit_pkts_vec_mseg_##name, > + > + NIX_TX_FASTPATH_MODES > +#undef T > + }; > + > + if (dev->scalar_ena) { > pick_tx_func(eth_dev, nix_eth_tx_burst); > - else > + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) > + pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); > + } else { > pick_tx_func(eth_dev, nix_eth_tx_vec_burst); > - > - if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) > - pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); > + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) > + pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg); > + } > > rte_mb(); > } > diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h > index dca732a9f..ed65cd351 100644 > --- a/drivers/net/cnxk/cn9k_tx.h > +++ b/drivers/net/cnxk/cn9k_tx.h > @@ -582,7 +582,238 @@ cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, > } > } > > +static __rte_always_inline uint8_t > +cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd, > + union nix_send_hdr_w0_u *sh, > + union nix_send_sg_s *sg, const uint32_t flags) > +{ > + struct rte_mbuf *m_next; > + uint64_t *slist, sg_u; > + uint16_t nb_segs; > + uint64_t segdw; > + int i = 1; > + > + sh->total = m->pkt_len; > + /* Clear sg->u header before use */ > + sg->u &= 0xFC00000000000000; > + sg_u = sg->u; > + slist = &cmd[0]; > + > + sg_u = sg_u | ((uint64_t)m->data_len); > + > + nb_segs = m->nb_segs - 1; > + m_next = m->next; > + > + /* Set invert df if buffer is not to be freed by H/W */ > + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) > + sg_u |= (cnxk_nix_prefree_seg(m) << 55); > + /* Mark mempool object as "put" since it is freed by NIX */ > +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG > + if (!(sg_u & (1ULL << 55))) > + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); > + rte_io_wmb(); > +#endif > + > + m = m_next; > + /* Fill mbuf segments */ > + do { > + m_next = m->next; > + sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); > + *slist = rte_mbuf_data_iova(m); > + /* Set invert df if buffer is not to be freed by H/W */ > + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) > + sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55)); > + /* Mark mempool object as "put" since it is freed by NIX > + */ > +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG > + if (!(sg_u & (1ULL << (i + 55)))) > + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); > + rte_io_wmb(); > +#endif > + slist++; > + i++; > + nb_segs--; > + if (i > 2 && nb_segs) { > + i = 0; > + /* Next SG subdesc */ > + *(uint64_t *)slist = sg_u & 0xFC00000000000000; > + sg->u = sg_u; > + sg->segs = 3; > + sg = (union nix_send_sg_s *)slist; > + sg_u = sg->u; > + slist++; > + } > + m = m_next; > + } while (nb_segs); > + > + sg->u = sg_u; > + sg->segs = i; > + segdw = (uint64_t *)slist - (uint64_t *)&cmd[0]; > + > + segdw += 2; > + /* Roundup extra dwords to multiple of 2 */ > + segdw = (segdw >> 1) + (segdw & 0x1); > + /* Default dwords */ > + segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) + > + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); > + sh->sizem1 = segdw - 1; > + > + return segdw; > +} > + > +static __rte_always_inline uint8_t > +cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0, > + uint64x2_t *cmd1, const uint32_t flags) > +{ > + union nix_send_hdr_w0_u sh; > + union nix_send_sg_s sg; > + uint8_t ret; > + > + if (m->nb_segs == 1) { > + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { > + sg.u = vgetq_lane_u64(cmd1[0], 0); > + sg.u |= (cnxk_nix_prefree_seg(m) << 55); > + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); > + } > + > +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG > + sg.u = vgetq_lane_u64(cmd1[0], 0); > + if (!(sg.u & (1ULL << 55))) > + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); > + rte_io_wmb(); > +#endif > + return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) + > + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); > + } > + > + sh.u = vgetq_lane_u64(cmd0[0], 0); > + sg.u = vgetq_lane_u64(cmd1[0], 0); > + > + ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags); > + > + cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0); > + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); > + return ret; > +} > + > #define NIX_DESCS_PER_LOOP 4 > + > +static __rte_always_inline void > +cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1, > + uint64x2_t *cmd2, uint64x2_t *cmd3, > + uint8_t *segdw, > + uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2], > + uint64_t *lmt_addr, rte_iova_t io_addr, > + const uint32_t flags) > +{ > + uint64_t lmt_status; > + uint8_t j, off; > + > + if (!(flags & NIX_TX_NEED_EXT_HDR) && > + !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) { > + /* No segments in 4 consecutive packets. */ > + if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) { > + do { > + vst1q_u64(lmt_addr, cmd0[0]); > + vst1q_u64(lmt_addr + 2, cmd1[0]); > + vst1q_u64(lmt_addr + 4, cmd0[1]); > + vst1q_u64(lmt_addr + 6, cmd1[1]); > + vst1q_u64(lmt_addr + 8, cmd0[2]); > + vst1q_u64(lmt_addr + 10, cmd1[2]); > + vst1q_u64(lmt_addr + 12, cmd0[3]); > + vst1q_u64(lmt_addr + 14, cmd1[3]); > + lmt_status = roc_lmt_submit_ldeor(io_addr); > + } while (lmt_status == 0); > + > + return; > + } > + } > + > + for (j = 0; j < NIX_DESCS_PER_LOOP;) { > + /* Fit consecutive packets in same LMTLINE. */ > + if ((segdw[j] + segdw[j + 1]) <= 8) { > +again0: > + if ((flags & NIX_TX_NEED_EXT_HDR) && > + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { > + vst1q_u64(lmt_addr, cmd0[j]); > + vst1q_u64(lmt_addr + 2, cmd2[j]); > + vst1q_u64(lmt_addr + 4, cmd1[j]); > + /* Copy segs */ > + off = segdw[j] - 4; > + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); > + off <<= 1; > + vst1q_u64(lmt_addr + 6 + off, cmd3[j]); > + > + vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]); > + vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]); > + vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]); > + roc_lmt_mov_seg(lmt_addr + 14 + off, > + slist[j + 1], segdw[j + 1] - 4); > + off += ((segdw[j + 1] - 4) << 1); > + vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]); > + } else if (flags & NIX_TX_NEED_EXT_HDR) { > + vst1q_u64(lmt_addr, cmd0[j]); > + vst1q_u64(lmt_addr + 2, cmd2[j]); > + vst1q_u64(lmt_addr + 4, cmd1[j]); > + /* Copy segs */ > + off = segdw[j] - 3; > + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); > + off <<= 1; > + vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]); > + vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]); > + vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]); > + roc_lmt_mov_seg(lmt_addr + 12 + off, > + slist[j + 1], segdw[j + 1] - 3); > + } else { > + vst1q_u64(lmt_addr, cmd0[j]); > + vst1q_u64(lmt_addr + 2, cmd1[j]); > + /* Copy segs */ > + off = segdw[j] - 2; > + roc_lmt_mov_seg(lmt_addr + 4, slist[j], off); > + off <<= 1; > + vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]); > + vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]); > + roc_lmt_mov_seg(lmt_addr + 8 + off, > + slist[j + 1], segdw[j + 1] - 2); > + } > + lmt_status = roc_lmt_submit_ldeor(io_addr); > + if (lmt_status == 0) > + goto again0; > + j += 2; > + } else { > +again1: > + if ((flags & NIX_TX_NEED_EXT_HDR) && > + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { > + vst1q_u64(lmt_addr, cmd0[j]); > + vst1q_u64(lmt_addr + 2, cmd2[j]); > + vst1q_u64(lmt_addr + 4, cmd1[j]); > + /* Copy segs */ > + off = segdw[j] - 4; > + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); > + off <<= 1; > + vst1q_u64(lmt_addr + 6 + off, cmd3[j]); > + } else if (flags & NIX_TX_NEED_EXT_HDR) { > + vst1q_u64(lmt_addr, cmd0[j]); > + vst1q_u64(lmt_addr + 2, cmd2[j]); > + vst1q_u64(lmt_addr + 4, cmd1[j]); > + /* Copy segs */ > + off = segdw[j] - 3; > + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); > + } else { > + vst1q_u64(lmt_addr, cmd0[j]); > + vst1q_u64(lmt_addr + 2, cmd1[j]); > + /* Copy segs */ > + off = segdw[j] - 2; > + roc_lmt_mov_seg(lmt_addr + 4, slist[j], off); > + } > + lmt_status = roc_lmt_submit_ldeor(io_addr); > + if (lmt_status == 0) > + goto again1; > + j += 1; > + } > + } > +} > + > static __rte_always_inline uint16_t > cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > uint16_t pkts, uint64_t *cmd, const uint16_t flags) > @@ -1380,7 +1611,8 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > sendext23_w0 = vld1q_u64(sx_w0 + 2); > } > > - if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { > + if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) && > + !(flags & NIX_TX_MULTI_SEG_F)) { > /* Set don't free bit if reference count > 1 */ > xmask01 = vdupq_n_u64(0); > xmask23 = xmask01; > @@ -1424,7 +1656,7 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > * cnxk_nix_prefree_seg are written before LMTST. > */ > rte_io_wmb(); > - } else { > + } else if (!(flags & NIX_TX_MULTI_SEG_F)) { > /* Move mbufs to iova */ > mbuf0 = (uint64_t *)tx_pkts[0]; > mbuf1 = (uint64_t *)tx_pkts[1]; > @@ -1472,7 +1704,27 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); > } > > - if (flags & NIX_TX_NEED_EXT_HDR) { > + if (flags & NIX_TX_MULTI_SEG_F) { > + uint64_t seg_list[NIX_DESCS_PER_LOOP] > + [CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; > + uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1]; > + > + /* Build mseg list for each packet individually. */ > + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) > + segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j], > + seg_list[j], &cmd0[j], > + &cmd1[j], flags); > + segdw[4] = 8; > + > + /* Commit all changes to mbuf before LMTST. */ > + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) > + rte_io_wmb(); > + > + cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3, > + segdw, seg_list, > + lmt_addr, io_addr, > + flags); > + } else if (flags & NIX_TX_NEED_EXT_HDR) { > /* With ext header in the command we can no longer send > * all 4 packets together since LMTLINE is 128bytes. > * Split and Tx twice. > @@ -1534,9 +1786,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; > } > > - if (unlikely(pkts_left)) > - pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd, > - flags); > + if (unlikely(pkts_left)) { > + if (flags & NIX_TX_MULTI_SEG_F) > + pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, > + pkts_left, cmd, flags); > + else > + pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, > + cmd, flags); > + } > > return pkts; > } > @@ -1701,6 +1958,9 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \ > void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ > \ > uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \ > + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ > + \ > + uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \ > void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); > > NIX_TX_FASTPATH_MODES > diff --git a/drivers/net/cnxk/cn9k_tx_vec_mseg.c b/drivers/net/cnxk/cn9k_tx_vec_mseg.c > new file mode 100644 > index 000000000..0256efd45 > --- /dev/null > +++ b/drivers/net/cnxk/cn9k_tx_vec_mseg.c > @@ -0,0 +1,24 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(C) 2021 Marvell. > + */ > + > +#include "cn9k_ethdev.h" > +#include "cn9k_tx.h" > + > +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ > + uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \ > + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \ > + { \ > + uint64_t cmd[sz]; \ > + \ > + /* For TSO inner checksum is a must */ \ > + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ > + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ > + return 0; \ > + return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ > + (flags) | \ > + NIX_TX_MULTI_SEG_F); \ > + } > + > +NIX_TX_FASTPATH_MODES > +#undef T > diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build > index aa8c7253f..361f7ce84 100644 > --- a/drivers/net/cnxk/meson.build > +++ b/drivers/net/cnxk/meson.build > @@ -26,7 +26,8 @@ sources += files('cn9k_ethdev.c', > 'cn9k_rx_vec_mseg.c', > 'cn9k_tx.c', > 'cn9k_tx_mseg.c', > - 'cn9k_tx_vec.c') > + 'cn9k_tx_vec.c', > + 'cn9k_tx_vec_mseg.c') > # CN10K > sources += files('cn10k_ethdev.c', > 'cn10k_rte_flow.c', > @@ -36,7 +37,8 @@ sources += files('cn10k_ethdev.c', > 'cn10k_rx_vec_mseg.c', > 'cn10k_tx.c', > 'cn10k_tx_mseg.c', > - 'cn10k_tx_vec.c') > + 'cn10k_tx_vec.c', > + 'cn10k_tx_vec_mseg.c') > > deps += ['bus_pci', 'cryptodev', 'eventdev', 'security'] > deps += ['common_cnxk', 'mempool_cnxk'] > -- > 2.17.1 >
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add multi-segment Rx vector routine, form the primary mbufs using vector path switch to scalar path when extracting segments. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Series-acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com> --- v5 Changes: - Fix incorrect mbuf assignment. v4 Changes: - Split patches for easier merge. - Rebase on dpdk-next-net-mrvl. v3 Changes: - Spell check. drivers/net/cnxk/cn10k_rx.c | 31 +++++++++++------ drivers/net/cnxk/cn10k_rx.h | 51 +++++++++++++++++++++------- drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++ drivers/net/cnxk/cn9k_rx.c | 31 +++++++++++------ drivers/net/cnxk/cn9k_rx.h | 51 +++++++++++++++++++++------- drivers/net/cnxk/cn9k_rx_vec_mseg.c | 18 ++++++++++ drivers/net/cnxk/meson.build | 2 ++ 7 files changed, 157 insertions(+), 44 deletions(-) create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c index 5c956c06b..3a9fd7130 100644 --- a/drivers/net/cnxk/cn10k_rx.c +++ b/drivers/net/cnxk/cn10k_rx.c @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev, [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)]; + + rte_atomic_thread_fence(__ATOMIC_RELEASE); } void @@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev) #undef R }; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) - pick_rx_func(eth_dev, nix_eth_rx_burst); - else - pick_rx_func(eth_dev, nix_eth_rx_vec_burst); + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name, - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + NIX_RX_FASTPATH_MODES +#undef R + }; /* Copy multi seg version with no offload for tear down sequence */ if (rte_eal_process_type() == RTE_PROC_PRIMARY) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - rte_mb(); + + /* For PTP enabled, scalar rx function should be chosen as most of the + * PTP apps are implemented to rx burst 1 pkt. + */ + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_burst); + } + + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst); } diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index 1cc37cbaa..5926ff7f4 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, sg = *(const uint64_t *)(rx + 1); nb_segs = (sg >> 48) & 0x3; - mbuf->nb_segs = nb_segs; + + if (nb_segs == 1) { + mbuf->next = NULL; + return; + } + + mbuf->pkt_len = rx->pkt_lenm1 + 1; mbuf->data_len = sg & 0xFFFF; + mbuf->nb_segs = nb_segs; sg = sg >> 16; eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1)); @@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf); mbuf->ol_flags = ol_flags; - *(uint64_t *)(&mbuf->rearm_data) = val; mbuf->pkt_len = len; + mbuf->data_len = len; + *(uint64_t *)(&mbuf->rearm_data) = val; - if (flag & NIX_RX_MULTI_SEG_F) { + if (flag & NIX_RX_MULTI_SEG_F) nix_cqe_xtract_mseg(rx, mbuf, val); - } else { - mbuf->data_len = len; + else mbuf->next = NULL; - } } static inline uint16_t @@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); - /* Update that no more segments */ - mbuf0->next = NULL; - mbuf1->next = NULL; - mbuf2->next = NULL; - mbuf3->next = NULL; - /* Store the mbufs to rx_pkts */ vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + if (flags & NIX_RX_MULTI_SEG_F) { + /* Multi segment is enable build mseg list for + * individual mbufs in scalar mode. + */ + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer); + } else { + /* Update that no more segments */ + mbuf0->next = NULL; + mbuf1->next = NULL; + mbuf2->next = NULL; + mbuf3->next = NULL; + } + /* Prefetch mbufs */ roc_prefetch_store_keep(mbuf0); roc_prefetch_store_keep(mbuf1); @@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c new file mode 100644 index 000000000..04d1e46c8 --- /dev/null +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_ethdev.h" +#include "cn10k_rx.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ + { \ + return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ + (flags) | NIX_RX_MULTI_SEG_F); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c index 0acedd0a1..d293d4eac 100644 --- a/drivers/net/cnxk/cn9k_rx.c +++ b/drivers/net/cnxk/cn9k_rx.c @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev, [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)] [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)]; + + rte_atomic_thread_fence(__ATOMIC_RELEASE); } void @@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev) #undef R }; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) - pick_rx_func(eth_dev, nix_eth_rx_burst); - else - pick_rx_func(eth_dev, nix_eth_rx_vec_burst); + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name, - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + NIX_RX_FASTPATH_MODES +#undef R + }; /* Copy multi seg version with no offload for tear down sequence */ if (rte_eal_process_type() == RTE_PROC_PRIMARY) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - rte_mb(); + + /* For PTP enabled, scalar rx function should be chosen as most of the + * PTP apps are implemented to rx burst 1 pkt. + */ + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_burst); + } + + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg); + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst); } diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h index 10ef5c690..5ae9e8195 100644 --- a/drivers/net/cnxk/cn9k_rx.h +++ b/drivers/net/cnxk/cn9k_rx.h @@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, sg = *(const uint64_t *)(rx + 1); nb_segs = (sg >> 48) & 0x3; - mbuf->nb_segs = nb_segs; + + if (nb_segs == 1) { + mbuf->next = NULL; + return; + } + + mbuf->pkt_len = rx->pkt_lenm1 + 1; mbuf->data_len = sg & 0xFFFF; + mbuf->nb_segs = nb_segs; sg = sg >> 16; eol = ((const rte_iova_t *)(rx + 1) + @@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf); mbuf->ol_flags = ol_flags; - *(uint64_t *)(&mbuf->rearm_data) = val; mbuf->pkt_len = len; + mbuf->data_len = len; + *(uint64_t *)(&mbuf->rearm_data) = val; - if (flag & NIX_RX_MULTI_SEG_F) { + if (flag & NIX_RX_MULTI_SEG_F) nix_cqe_xtract_mseg(rx, mbuf, val); - } else { - mbuf->data_len = len; + else mbuf->next = NULL; - } } static inline uint16_t @@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); - /* Update that no more segments */ - mbuf0->next = NULL; - mbuf1->next = NULL; - mbuf2->next = NULL; - mbuf3->next = NULL; - /* Store the mbufs to rx_pkts */ vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + if (flags & NIX_RX_MULTI_SEG_F) { + /* Multi segment is enable build mseg list for + * individual mbufs in scalar mode. + */ + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer); + nix_cqe_xtract_mseg((union nix_rx_parse_u *) + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer); + } else { + /* Update that no more segments */ + mbuf0->next = NULL; + mbuf1->next = NULL; + mbuf2->next = NULL; + mbuf3->next = NULL; + } + /* Prefetch mbufs */ roc_prefetch_store_keep(mbuf0); roc_prefetch_store_keep(mbuf1); @@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c new file mode 100644 index 000000000..e46d8a474 --- /dev/null +++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_ethdev.h" +#include "cn9k_rx.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ + { \ + return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ + (flags) | \ + NIX_RX_MULTI_SEG_F); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build index 2071d0dcb..aa8c7253f 100644 --- a/drivers/net/cnxk/meson.build +++ b/drivers/net/cnxk/meson.build @@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c', 'cn9k_rx.c', 'cn9k_rx_mseg.c', 'cn9k_rx_vec.c', + 'cn9k_rx_vec_mseg.c', 'cn9k_tx.c', 'cn9k_tx_mseg.c', 'cn9k_tx_vec.c') @@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c', 'cn10k_rx.c', 'cn10k_rx_mseg.c', 'cn10k_rx_vec.c', + 'cn10k_rx_vec_mseg.c', 'cn10k_tx.c', 'cn10k_tx_mseg.c', 'cn10k_tx_vec.c') -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable PTP offload in vector Rx burst function, use vector path for processing mbufs and finally switch to scalar when extracting timestamp. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_ethdev.c | 1 - drivers/net/cnxk/cn10k_rx.c | 5 +- drivers/net/cnxk/cn10k_rx.h | 124 ++++++++++++++++++++++++++++---- drivers/net/cnxk/cn10k_rx_vec.c | 3 - drivers/net/cnxk/cn9k_ethdev.c | 1 - drivers/net/cnxk/cn9k_rx.c | 5 +- drivers/net/cnxk/cn9k_rx.h | 124 ++++++++++++++++++++++++++++---- drivers/net/cnxk/cn9k_rx_vec.c | 3 - drivers/net/cnxk/cnxk_ethdev.h | 19 ++--- 9 files changed, 232 insertions(+), 53 deletions(-) diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c index b079edbd3..7caec6cf1 100644 --- a/drivers/net/cnxk/cn10k_ethdev.c +++ b/drivers/net/cnxk/cn10k_ethdev.c @@ -301,7 +301,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev) if (nix_recalc_mtu(eth_dev)) plt_err("Failed to set MTU size for ptp"); - dev->scalar_ena = true; dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F; /* Setting up the function pointers as per new offload flags */ diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c index 3a9fd7130..69e767ac3 100644 --- a/drivers/net/cnxk/cn10k_rx.c +++ b/drivers/net/cnxk/cn10k_rx.c @@ -75,10 +75,7 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->scalar_ena) { if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); return pick_rx_func(eth_dev, nix_eth_rx_burst); diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index 5926ff7f4..d9572b19e 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -109,7 +109,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags, static __rte_always_inline void nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, - uint64_t rearm) + uint64_t rearm, const uint16_t flags) { const rte_iova_t *iova_list; struct rte_mbuf *head; @@ -125,8 +125,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, return; } - mbuf->pkt_len = rx->pkt_lenm1 + 1; - mbuf->data_len = sg & 0xFFFF; + mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); + mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); mbuf->nb_segs = nb_segs; sg = sg >> 16; @@ -207,7 +209,7 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, *(uint64_t *)(&mbuf->rearm_data) = val; if (flag & NIX_RX_MULTI_SEG_F) - nix_cqe_xtract_mseg(rx, mbuf, val); + nix_cqe_xtract_mseg(rx, mbuf, val, flag); else mbuf->next = NULL; } @@ -272,8 +274,9 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, flags); cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp, (flags & NIX_RX_OFFLOAD_TSTAMP_F), - (uint64_t *)((uint8_t *)mbuf + data_off) - ); + (flags & NIX_RX_MULTI_SEG_F), + (uint64_t *)((uint8_t *)mbuf + + data_off)); rx_pkts[packets++] = mbuf; roc_prefetch_store_keep(mbuf); head++; @@ -469,6 +472,99 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, mbuf3); } + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { + const uint16x8_t len_off = { + 0, /* ptype 0:15 */ + 0, /* ptype 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/ + 0, /* pktlen 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */ + 0, + 0, + 0}; + const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC}; + const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | + PKT_RX_IEEE1588_TMST | + rxq->tstamp->rx_tstamp_dynflag; + const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; + uint64x2_t ts01, ts23, mask; + uint64_t ts[4]; + uint8_t res; + + /* Subtract timesync length from total pkt length. */ + f0 = vsubq_u16(f0, len_off); + f1 = vsubq_u16(f1, len_off); + f2 = vsubq_u16(f2, len_off); + f3 = vsubq_u16(f3, len_off); + + /* Get the address of actual timestamp. */ + ts01 = vaddq_u64(mbuf01, data_off); + ts23 = vaddq_u64(mbuf23, data_off); + /* Load timestamp from address. */ + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 0), + ts01, 0); + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 1), + ts01, 1); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 0), + ts23, 0); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 1), + ts23, 1); + /* Convert from be to cpu byteorder. */ + ts01 = vrev64q_u8(ts01); + ts23 = vrev64q_u8(ts23); + /* Store timestamp into scalar for later use. */ + ts[0] = vgetq_lane_u64(ts01, 0); + ts[1] = vgetq_lane_u64(ts01, 1); + ts[2] = vgetq_lane_u64(ts23, 0); + ts[3] = vgetq_lane_u64(ts23, 1); + + /* Store timestamp into dynfield. */ + *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = + ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = + ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = + ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = + ts[3]; + + /* Generate ptype mask to filter L2 ether timesync */ + mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); + mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1); + mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2); + mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3); + + /* Match against L2 ether timesync. */ + mask = vceqq_u32(mask, ptype); + /* Convert from vector from scalar mask */ + res = vaddvq_u32(vandq_u32(mask, and_mask)); + res &= 0xF; + + if (res) { + /* Fill in the ol_flags for any packets that + * matched. + */ + ol_flags0 |= ((res & 0x1) ? ts_olf : 0); + ol_flags1 |= ((res & 0x2) ? ts_olf : 0); + ol_flags2 |= ((res & 0x4) ? ts_olf : 0); + ol_flags3 |= ((res & 0x8) ? ts_olf : 0); + + /* Update Rxq timestamp with the latest + * timestamp. + */ + rxq->tstamp->rx_ready = 1; + rxq->tstamp->rx_tstamp = + ts[31 - __builtin_clz(res)]; + } + } + /* Form rearm_data with ol_flags */ rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1); rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1); @@ -496,17 +592,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer); + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer); + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer); + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer); + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c index 65ffa9784..93528a44f 100644 --- a/drivers/net/cnxk/cn10k_rx_vec.c +++ b/drivers/net/cnxk/cn10k_rx_vec.c @@ -11,9 +11,6 @@ struct rte_mbuf **rx_pkts, \ uint16_t pkts) \ { \ - /* TSTMP is not supported by vector */ \ - if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \ - return 0; \ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ (flags)); \ } diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c index 994fdb7c3..115e67891 100644 --- a/drivers/net/cnxk/cn9k_ethdev.c +++ b/drivers/net/cnxk/cn9k_ethdev.c @@ -309,7 +309,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev) if (nix_recalc_mtu(eth_dev)) plt_err("Failed to set MTU size for ptp"); - dev->scalar_ena = true; dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F; /* Setting up the function pointers as per new offload flags */ diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c index d293d4eac..7d9f1bd61 100644 --- a/drivers/net/cnxk/cn9k_rx.c +++ b/drivers/net/cnxk/cn9k_rx.c @@ -75,10 +75,7 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->scalar_ena) { if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); return pick_rx_func(eth_dev, nix_eth_rx_burst); diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h index 5ae9e8195..beb52f39d 100644 --- a/drivers/net/cnxk/cn9k_rx.h +++ b/drivers/net/cnxk/cn9k_rx.h @@ -110,7 +110,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags, static __rte_always_inline void nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, - uint64_t rearm) + uint64_t rearm, const uint16_t flags) { const rte_iova_t *iova_list; struct rte_mbuf *head; @@ -126,8 +126,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, return; } - mbuf->pkt_len = rx->pkt_lenm1 + 1; - mbuf->data_len = sg & 0xFFFF; + mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); + mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); mbuf->nb_segs = nb_segs; sg = sg >> 16; @@ -210,7 +212,7 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, *(uint64_t *)(&mbuf->rearm_data) = val; if (flag & NIX_RX_MULTI_SEG_F) - nix_cqe_xtract_mseg(rx, mbuf, val); + nix_cqe_xtract_mseg(rx, mbuf, val, flag); else mbuf->next = NULL; } @@ -275,8 +277,9 @@ cn9k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, flags); cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp, (flags & NIX_RX_OFFLOAD_TSTAMP_F), - (uint64_t *)((uint8_t *)mbuf + data_off) - ); + (flags & NIX_RX_MULTI_SEG_F), + (uint64_t *)((uint8_t *)mbuf + + data_off)); rx_pkts[packets++] = mbuf; roc_prefetch_store_keep(mbuf); head++; @@ -472,6 +475,99 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, mbuf3); } + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { + const uint16x8_t len_off = { + 0, /* ptype 0:15 */ + 0, /* ptype 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/ + 0, /* pktlen 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */ + 0, + 0, + 0}; + const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC}; + const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | + PKT_RX_IEEE1588_TMST | + rxq->tstamp->rx_tstamp_dynflag; + const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; + uint64x2_t ts01, ts23, mask; + uint64_t ts[4]; + uint8_t res; + + /* Subtract timesync length from total pkt length. */ + f0 = vsubq_u16(f0, len_off); + f1 = vsubq_u16(f1, len_off); + f2 = vsubq_u16(f2, len_off); + f3 = vsubq_u16(f3, len_off); + + /* Get the address of actual timestamp. */ + ts01 = vaddq_u64(mbuf01, data_off); + ts23 = vaddq_u64(mbuf23, data_off); + /* Load timestamp from address. */ + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 0), + ts01, 0); + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 1), + ts01, 1); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 0), + ts23, 0); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 1), + ts23, 1); + /* Convert from be to cpu byteorder. */ + ts01 = vrev64q_u8(ts01); + ts23 = vrev64q_u8(ts23); + /* Store timestamp into scalar for later use. */ + ts[0] = vgetq_lane_u64(ts01, 0); + ts[1] = vgetq_lane_u64(ts01, 1); + ts[2] = vgetq_lane_u64(ts23, 0); + ts[3] = vgetq_lane_u64(ts23, 1); + + /* Store timestamp into dynfield. */ + *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = + ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = + ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = + ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = + ts[3]; + + /* Generate ptype mask to filter L2 ether timesync */ + mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); + mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1); + mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2); + mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3); + + /* Match against L2 ether timesync. */ + mask = vceqq_u32(mask, ptype); + /* Convert from vector from scalar mask */ + res = vaddvq_u32(vandq_u32(mask, and_mask)); + res &= 0xF; + + if (res) { + /* Fill in the ol_flags for any packets that + * matched. + */ + ol_flags0 |= ((res & 0x1) ? ts_olf : 0); + ol_flags1 |= ((res & 0x2) ? ts_olf : 0); + ol_flags2 |= ((res & 0x4) ? ts_olf : 0); + ol_flags3 |= ((res & 0x8) ? ts_olf : 0); + + /* Update Rxq timestamp with the latest + * timestamp. + */ + rxq->tstamp->rx_ready = 1; + rxq->tstamp->rx_tstamp = + ts[31 - __builtin_clz(res)]; + } + } + /* Form rearm_data with ol_flags */ rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1); rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1); @@ -499,17 +595,17 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer); + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer); + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer); + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer); + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; diff --git a/drivers/net/cnxk/cn9k_rx_vec.c b/drivers/net/cnxk/cn9k_rx_vec.c index e61c2225c..ef5f771ef 100644 --- a/drivers/net/cnxk/cn9k_rx_vec.c +++ b/drivers/net/cnxk/cn9k_rx_vec.c @@ -9,9 +9,6 @@ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ { \ - /* TSTMP is not supported by vector */ \ - if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \ - return 0; \ return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ (flags)); \ } diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h index 67b1f4253..4eead0390 100644 --- a/drivers/net/cnxk/cnxk_ethdev.h +++ b/drivers/net/cnxk/cnxk_ethdev.h @@ -136,13 +136,12 @@ struct cnxk_eth_qconf { }; struct cnxk_timesync_info { + uint8_t rx_ready; + uint64_t rx_tstamp; uint64_t rx_tstamp_dynflag; + int tstamp_dynfield_offset; rte_iova_t tx_tstamp_iova; uint64_t *tx_tstamp; - uint64_t rx_tstamp; - int tstamp_dynfield_offset; - uint8_t tx_ready; - uint8_t rx_ready; } __plt_cache_aligned; struct cnxk_eth_dev { @@ -465,13 +464,15 @@ cnxk_nix_timestamp_dynfield(struct rte_mbuf *mbuf, static __rte_always_inline void cnxk_nix_mbuf_to_tstamp(struct rte_mbuf *mbuf, - struct cnxk_timesync_info *tstamp, bool ts_enable, + struct cnxk_timesync_info *tstamp, + const uint8_t ts_enable, const uint8_t mseg_enable, uint64_t *tstamp_ptr) { - if (ts_enable && - (mbuf->data_off == - RTE_PKTMBUF_HEADROOM + CNXK_NIX_TIMESYNC_RX_OFFSET)) { - mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET; + if (ts_enable) { + if (!mseg_enable) { + mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET; + mbuf->data_len -= CNXK_NIX_TIMESYNC_RX_OFFSET; + } /* Reading the rx timestamp inserted by CGX, viz at * starting of the packet data. -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable VLAN offload in vector Tx burst function. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 3 +- drivers/net/cnxk/cn10k_tx.h | 125 +++++++++++++++++++++++++++---- drivers/net/cnxk/cn10k_tx_vec.c | 3 +- drivers/net/cnxk/cn9k_tx.c | 3 +- drivers/net/cnxk/cn9k_tx.h | 128 ++++++++++++++++++++++++++++---- drivers/net/cnxk/cn9k_tx_vec.c | 3 +- 6 files changed, 227 insertions(+), 38 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 18694dc70..05bc163a4 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -69,8 +69,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) if (dev->scalar_ena || (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | - NIX_TX_OFFLOAD_TSO_F))) + (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 8b1446f25..1e1697858 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -62,9 +62,14 @@ cn10k_nix_tx_ext_subs(const uint16_t flags) static __rte_always_inline uint8_t cn10k_nix_pkts_per_vec_brst(const uint16_t flags) { - RTE_SET_USED(flags); - /* We can pack up to 4 packets per LMTLINE if there are no offloads. */ - return 4 << ROC_LMT_LINES_PER_CORE_LOG2; + return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4) + << ROC_LMT_LINES_PER_CORE_LOG2; +} + +static __rte_always_inline uint8_t +cn10k_nix_tx_dwords_per_line(const uint16_t flags) +{ + return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8; } static __rte_always_inline uint64_t @@ -98,10 +103,9 @@ cn10k_nix_tx_steor_data(const uint16_t flags) static __rte_always_inline uint64_t cn10k_nix_tx_steor_vec_data(const uint16_t flags) { - const uint64_t dw_m1 = 0x7; + const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1; uint64_t data; - RTE_SET_USED(flags); /* This will be moved to addr area */ data = dw_m1; /* 15 vector sizes for single seg */ @@ -690,11 +694,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; - uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP]; + uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], + cmd2[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint16_t left, scalar, burst, i, lmt_id; + uint64x2_t sendext01_w0, sendext23_w0; + uint64x2_t sendext01_w1, sendext23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn10k_eth_txq *txq = tx_queue; @@ -720,6 +727,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0); sgdesc23_w0 = sgdesc01_w0; + /* Load command defaults into vector variables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]); + sendext23_w0 = sendext01_w0; + sendext01_w1 = vdupq_n_u64(12 | 12U << 24); + sendext23_w1 = sendext01_w1; + } + /* Get LMT base address and LMT ID as lcore id */ ROC_LMT_BASE_ID_GET(laddr, lmt_id); left = pkts; @@ -738,6 +753,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc23_w0 = senddesc01_w0; sgdesc23_w0 = sgdesc01_w0; + /* Clear vlan enables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w1 = vbicq_u64(sendext01_w1, + vdupq_n_u64(0x3FFFF00FFFF00)); + sendext23_w1 = sendext01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1303,6 +1325,52 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); + if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) { + /* Tx ol_flag for vlan. */ + const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN}; + /* Bit enable for VLAN1 */ + const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)}; + /* Tx ol_flag for QnQ. */ + const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ}; + /* Bit enable for VLAN0 */ + const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)}; + /* Load vlan values from packet. outer is VLAN 0 */ + uint64x2_t ext01 = { + ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[0]->vlan_tci) << 32, + ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[1]->vlan_tci) << 32, + }; + uint64x2_t ext23 = { + ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[2]->vlan_tci) << 32, + ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[3]->vlan_tci) << 32, + }; + + /* Get ol_flags of the packets. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* ORR vlan outer/inner values into cmd. */ + sendext01_w1 = vorrq_u64(sendext01_w1, ext01); + sendext23_w1 = vorrq_u64(sendext23_w1, ext23); + + /* Test for offload enable bits and generate masks. */ + xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv), + mlv), + vandq_u64(vtstq_u64(xtmp128, olq), + mlq)); + ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv), + mlv), + vandq_u64(vtstq_u64(ytmp128, olq), + mlq)); + + /* Set vlan enable bits into cmd based on mask. */ + sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128); + sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1381,16 +1449,41 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1); cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1); - /* Store the prepared send desc to LMT lines */ - vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]); - vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]); - lnum += 1; + if (flags & NIX_TX_NEED_EXT_HDR) { + cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1); + cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1); + cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1); + cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); + } + + if (flags & NIX_TX_NEED_EXT_HDR) { + /* Store the prepared send desc to LMT lines */ + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]); + lnum += 1; + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]); + lnum += 1; + } else { + /* Store the prepared send desc to LMT lines */ + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]); + lnum += 1; + } tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 7453f3bc9..beb5c649b 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -14,8 +14,7 @@ uint64_t cmd[sz]; \ \ /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \ - (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ + if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ (flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index b80260607..4b43cdaff 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -68,8 +68,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) if (dev->scalar_ena || (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | - NIX_TX_OFFLOAD_TSO_F))) + (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index 1899d6670..d5715bb52 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -552,10 +552,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; - uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP]; + uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], + cmd2[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; + uint64x2_t sendext01_w0, sendext23_w0; + uint64x2_t sendext01_w1, sendext23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn9k_eth_txq *txq = tx_queue; @@ -585,8 +588,19 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc23_w0 = senddesc01_w0; senddesc01_w1 = vdupq_n_u64(0); senddesc23_w1 = senddesc01_w1; - sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]); - sgdesc23_w0 = sgdesc01_w0; + + /* Load command defaults into vector variables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]); + sendext23_w0 = sendext01_w0; + sendext01_w1 = vdupq_n_u64(12 | 12U << 24); + sendext23_w1 = sendext01_w1; + sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]); + sgdesc23_w0 = sgdesc01_w0; + } else { + sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]); + sgdesc23_w0 = sgdesc01_w0; + } for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) { /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */ @@ -597,6 +611,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc23_w0 = senddesc01_w0; sgdesc23_w0 = sgdesc01_w0; + /* Clear vlan enables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w1 = vbicq_u64(sendext01_w1, + vdupq_n_u64(0x3FFFF00FFFF00)); + sendext23_w1 = sendext01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1162,6 +1183,52 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); + if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) { + /* Tx ol_flag for vlan. */ + const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN}; + /* Bit enable for VLAN1 */ + const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)}; + /* Tx ol_flag for QnQ. */ + const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ}; + /* Bit enable for VLAN0 */ + const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)}; + /* Load vlan values from packet. outer is VLAN 0 */ + uint64x2_t ext01 = { + ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[0]->vlan_tci) << 32, + ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[1]->vlan_tci) << 32, + }; + uint64x2_t ext23 = { + ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[2]->vlan_tci) << 32, + ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[3]->vlan_tci) << 32, + }; + + /* Get ol_flags of the packets. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* ORR vlan outer/inner values into cmd. */ + sendext01_w1 = vorrq_u64(sendext01_w1, ext01); + sendext23_w1 = vorrq_u64(sendext23_w1, ext23); + + /* Test for offload enable bits and generate masks. */ + xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv), + mlv), + vandq_u64(vtstq_u64(xtmp128, olq), + mlq)); + ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv), + mlv), + vandq_u64(vtstq_u64(ytmp128, olq), + mlq)); + + /* Set vlan enable bits into cmd based on mask. */ + sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128); + sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1247,17 +1314,50 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1); cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1); - do { - vst1q_u64(lmt_addr, cmd0[0]); - vst1q_u64(lmt_addr + 2, cmd1[0]); - vst1q_u64(lmt_addr + 4, cmd0[1]); - vst1q_u64(lmt_addr + 6, cmd1[1]); - vst1q_u64(lmt_addr + 8, cmd0[2]); - vst1q_u64(lmt_addr + 10, cmd1[2]); - vst1q_u64(lmt_addr + 12, cmd0[3]); - vst1q_u64(lmt_addr + 14, cmd1[3]); - lmt_status = roc_lmt_submit_ldeor(io_addr); - } while (lmt_status == 0); + if (flags & NIX_TX_NEED_EXT_HDR) { + cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1); + cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1); + cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1); + cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); + } + + if (flags & NIX_TX_NEED_EXT_HDR) { + /* With ext header in the command we can no longer send + * all 4 packets together since LMTLINE is 128bytes. + * Split and Tx twice. + */ + do { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd2[0]); + vst1q_u64(lmt_addr + 4, cmd1[0]); + vst1q_u64(lmt_addr + 6, cmd0[1]); + vst1q_u64(lmt_addr + 8, cmd2[1]); + vst1q_u64(lmt_addr + 10, cmd1[1]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + + do { + vst1q_u64(lmt_addr, cmd0[2]); + vst1q_u64(lmt_addr + 2, cmd2[2]); + vst1q_u64(lmt_addr + 4, cmd1[2]); + vst1q_u64(lmt_addr + 6, cmd0[3]); + vst1q_u64(lmt_addr + 8, cmd2[3]); + vst1q_u64(lmt_addr + 10, cmd1[3]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + } else { + do { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd1[0]); + vst1q_u64(lmt_addr + 4, cmd0[1]); + vst1q_u64(lmt_addr + 6, cmd1[1]); + vst1q_u64(lmt_addr + 8, cmd0[2]); + vst1q_u64(lmt_addr + 10, cmd1[2]); + vst1q_u64(lmt_addr + 12, cmd0[3]); + vst1q_u64(lmt_addr + 14, cmd1[3]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + } tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c index a6e7c9e54..5842facb5 100644 --- a/drivers/net/cnxk/cn9k_tx_vec.c +++ b/drivers/net/cnxk/cn9k_tx_vec.c @@ -14,8 +14,7 @@ uint64_t cmd[sz]; \ \ /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \ - (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ + if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ (flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable PTP offload in vector Tx burst function. Since, we can no-longer use a single LMT line for burst of 4, split the LMT into two and transmit twice. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 4 +- drivers/net/cnxk/cn10k_tx.h | 109 +++++++++++++++++++++++++++----- drivers/net/cnxk/cn10k_tx_vec.c | 5 +- drivers/net/cnxk/cn9k_tx.c | 4 +- drivers/net/cnxk/cn9k_tx.h | 105 ++++++++++++++++++++++++++---- drivers/net/cnxk/cn9k_tx_vec.c | 5 +- 6 files changed, 192 insertions(+), 40 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 05bc163a4..c4c3e6570 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -67,9 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || - (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) + if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 1e1697858..8af6799ff 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -69,7 +69,9 @@ cn10k_nix_pkts_per_vec_brst(const uint16_t flags) static __rte_always_inline uint8_t cn10k_nix_tx_dwords_per_line(const uint16_t flags) { - return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8; + return (flags & NIX_TX_NEED_EXT_HDR) ? + ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) : + 8; } static __rte_always_inline uint64_t @@ -695,13 +697,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], - cmd2[NIX_DESCS_PER_LOOP]; + cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint16_t left, scalar, burst, i, lmt_id; uint64x2_t sendext01_w0, sendext23_w0; uint64x2_t sendext01_w1, sendext23_w1; + uint64x2_t sendmem01_w0, sendmem23_w0; + uint64x2_t sendmem01_w1, sendmem23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn10k_eth_txq *txq = tx_queue; @@ -733,6 +737,12 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w0 = sendext01_w0; sendext01_w1 = vdupq_n_u64(12 | 12U << 24); sendext23_w1 = sendext01_w1; + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]); + sendmem23_w0 = sendmem01_w0; + sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]); + sendmem23_w1 = sendmem01_w1; + } } /* Get LMT base address and LMT ID as lcore id */ @@ -760,6 +770,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = sendext01_w1; } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Reset send mem alg to SETTSTMP from SUB*/ + sendmem01_w0 = vbicq_u64(sendmem01_w0, + vdupq_n_u64(BIT_ULL(59))); + /* Reset send mem address to default. */ + sendmem01_w1 = + vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF)); + sendmem23_w0 = sendmem01_w0; + sendmem23_w1 = sendmem01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1371,6 +1392,44 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Tx ol_flag for timestam. */ + const uint64x2_t olf = {PKT_TX_IEEE1588_TMST, + PKT_TX_IEEE1588_TMST}; + /* Set send mem alg to SUB. */ + const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)}; + /* Increment send mem address by 8. */ + const uint64x2_t addr = {0x8, 0x8}; + + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Check if timestamp is requested and generate inverted + * mask as we need not make any changes to default cmd + * value. + */ + xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128)); + ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128)); + + /* Change send mem address to an 8 byte offset when + * TSTMP is disabled. + */ + sendmem01_w1 = vaddq_u64(sendmem01_w1, + vandq_u64(xtmp128, addr)); + sendmem23_w1 = vaddq_u64(sendmem23_w1, + vandq_u64(ytmp128, addr)); + /* Change send mem alg to SUB when TSTMP is disabled. */ + sendmem01_w0 = vorrq_u64(sendmem01_w0, + vandq_u64(xtmp128, alg)); + sendmem23_w0 = vorrq_u64(sendmem23_w0, + vandq_u64(ytmp128, alg)); + + cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1); + cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1); + cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1); + cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1458,19 +1517,39 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (flags & NIX_TX_NEED_EXT_HDR) { /* Store the prepared send desc to LMT lines */ - vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]); - lnum += 1; - vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]); - vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]); - vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]); + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]); + lnum += 1; + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]); + } else { + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]); + lnum += 1; + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]); + } lnum += 1; } else { /* Store the prepared send desc to LMT lines */ diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index beb5c649b..0b4a4c7ba 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -13,9 +13,8 @@ { \ uint64_t cmd[sz]; \ \ - /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ - (flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* TSO is not supported by vec */ \ + if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ (flags)); \ diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index 4b43cdaff..c32681ed4 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -66,9 +66,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || - (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) + if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index d5715bb52..cb574a1c1 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -553,12 +553,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], - cmd2[NIX_DESCS_PER_LOOP]; + cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint64x2_t sendext01_w0, sendext23_w0; uint64x2_t sendext01_w1, sendext23_w1; + uint64x2_t sendmem01_w0, sendmem23_w0; + uint64x2_t sendmem01_w1, sendmem23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn9k_eth_txq *txq = tx_queue; @@ -597,6 +599,12 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = sendext01_w1; sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]); sgdesc23_w0 = sgdesc01_w0; + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + sendmem01_w0 = vld1q_dup_u64(&txq->cmd[6]); + sendmem23_w0 = sendmem01_w0; + sendmem01_w1 = vld1q_dup_u64(&txq->cmd[7]); + sendmem23_w1 = sendmem01_w1; + } } else { sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]); sgdesc23_w0 = sgdesc01_w0; @@ -618,6 +626,17 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = sendext01_w1; } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Reset send mem alg to SETTSTMP from SUB*/ + sendmem01_w0 = vbicq_u64(sendmem01_w0, + vdupq_n_u64(BIT_ULL(59))); + /* Reset send mem address to default. */ + sendmem01_w1 = + vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF)); + sendmem23_w0 = sendmem01_w0; + sendmem23_w1 = sendmem01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1229,6 +1248,44 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); } + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + /* Tx ol_flag for timestam. */ + const uint64x2_t olf = {PKT_TX_IEEE1588_TMST, + PKT_TX_IEEE1588_TMST}; + /* Set send mem alg to SUB. */ + const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)}; + /* Increment send mem address by 8. */ + const uint64x2_t addr = {0x8, 0x8}; + + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Check if timestamp is requested and generate inverted + * mask as we need not make any changes to default cmd + * value. + */ + xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128)); + ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128)); + + /* Change send mem address to an 8 byte offset when + * TSTMP is disabled. + */ + sendmem01_w1 = vaddq_u64(sendmem01_w1, + vandq_u64(xtmp128, addr)); + sendmem23_w1 = vaddq_u64(sendmem23_w1, + vandq_u64(ytmp128, addr)); + /* Change send mem alg to SUB when TSTMP is disabled. */ + sendmem01_w0 = vorrq_u64(sendmem01_w0, + vandq_u64(xtmp128, alg)); + sendmem23_w0 = vorrq_u64(sendmem23_w0, + vandq_u64(ytmp128, alg)); + + cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1); + cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1); + cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1); + cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1327,22 +1384,44 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, * Split and Tx twice. */ do { - vst1q_u64(lmt_addr, cmd0[0]); - vst1q_u64(lmt_addr + 2, cmd2[0]); - vst1q_u64(lmt_addr + 4, cmd1[0]); - vst1q_u64(lmt_addr + 6, cmd0[1]); - vst1q_u64(lmt_addr + 8, cmd2[1]); - vst1q_u64(lmt_addr + 10, cmd1[1]); + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd2[0]); + vst1q_u64(lmt_addr + 4, cmd1[0]); + vst1q_u64(lmt_addr + 6, cmd3[0]); + vst1q_u64(lmt_addr + 8, cmd0[1]); + vst1q_u64(lmt_addr + 10, cmd2[1]); + vst1q_u64(lmt_addr + 12, cmd1[1]); + vst1q_u64(lmt_addr + 14, cmd3[1]); + } else { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd2[0]); + vst1q_u64(lmt_addr + 4, cmd1[0]); + vst1q_u64(lmt_addr + 6, cmd0[1]); + vst1q_u64(lmt_addr + 8, cmd2[1]); + vst1q_u64(lmt_addr + 10, cmd1[1]); + } lmt_status = roc_lmt_submit_ldeor(io_addr); } while (lmt_status == 0); do { - vst1q_u64(lmt_addr, cmd0[2]); - vst1q_u64(lmt_addr + 2, cmd2[2]); - vst1q_u64(lmt_addr + 4, cmd1[2]); - vst1q_u64(lmt_addr + 6, cmd0[3]); - vst1q_u64(lmt_addr + 8, cmd2[3]); - vst1q_u64(lmt_addr + 10, cmd1[3]); + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + vst1q_u64(lmt_addr, cmd0[2]); + vst1q_u64(lmt_addr + 2, cmd2[2]); + vst1q_u64(lmt_addr + 4, cmd1[2]); + vst1q_u64(lmt_addr + 6, cmd3[2]); + vst1q_u64(lmt_addr + 8, cmd0[3]); + vst1q_u64(lmt_addr + 10, cmd2[3]); + vst1q_u64(lmt_addr + 12, cmd1[3]); + vst1q_u64(lmt_addr + 14, cmd3[3]); + } else { + vst1q_u64(lmt_addr, cmd0[2]); + vst1q_u64(lmt_addr + 2, cmd2[2]); + vst1q_u64(lmt_addr + 4, cmd1[2]); + vst1q_u64(lmt_addr + 6, cmd0[3]); + vst1q_u64(lmt_addr + 8, cmd2[3]); + vst1q_u64(lmt_addr + 10, cmd1[3]); + } lmt_status = roc_lmt_submit_ldeor(io_addr); } while (lmt_status == 0); } else { diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c index 5842facb5..9ade66db2 100644 --- a/drivers/net/cnxk/cn9k_tx_vec.c +++ b/drivers/net/cnxk/cn9k_tx_vec.c @@ -13,9 +13,8 @@ { \ uint64_t cmd[sz]; \ \ - /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ - (flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* TSO is not supported by vec */ \ + if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ (flags)); \ -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enable TSO offload in vector Tx burst function. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 2 +- drivers/net/cnxk/cn10k_tx.h | 97 +++++++++++++++++++++++++++++++++ drivers/net/cnxk/cn10k_tx_vec.c | 5 +- drivers/net/cnxk/cn9k_tx.c | 2 +- drivers/net/cnxk/cn9k_tx.h | 94 ++++++++++++++++++++++++++++++++ drivers/net/cnxk/cn9k_tx_vec.c | 5 +- 6 files changed, 199 insertions(+), 6 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index c4c3e6570..d06879163 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -67,7 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) + if (dev->scalar_ena) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 8af6799ff..26797581e 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -689,6 +689,46 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, #if defined(RTE_ARCH_ARM64) +static __rte_always_inline void +cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, + union nix_send_ext_w0_u *w0, uint64_t ol_flags, + const uint64_t flags, const uint64_t lso_tun_fmt) +{ + uint16_t lso_sb; + uint64_t mask; + + if (!(ol_flags & PKT_TX_TCP_SEG)) + return; + + mask = -(!w1->il3type); + lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len; + + w0->u |= BIT(14); + w0->lso_sb = lso_sb; + w0->lso_mps = m->tso_segsz; + w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6); + w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM; + + /* Handle tunnel tso */ + if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) && + (ol_flags & PKT_TX_TUNNEL_MASK)) { + const uint8_t is_udp_tun = + (CNXK_NIX_UDP_TUN_BITMASK >> + ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & + 0x1; + uint8_t shift = is_udp_tun ? 32 : 0; + + shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4); + shift += (!!(ol_flags & PKT_TX_IPV6) << 3); + + w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM; + w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0; + /* Update format for UDP tunneled packet */ + + w0->lso_format = (lso_tun_fmt >> shift); + } +} + #define NIX_DESCS_PER_LOOP 4 static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, @@ -723,6 +763,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, /* Reduce the cached count */ txq->fc_cache_pkts -= pkts; + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) { + for (i = 0; i < pkts; i++) + cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags); + } senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0); senddesc23_w0 = senddesc01_w0; @@ -781,6 +826,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendmem23_w1 = sendmem01_w1; } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + /* Clear the LSO enable bit. */ + sendext01_w0 = vbicq_u64(sendext01_w0, + vdupq_n_u64(BIT_ULL(14))); + sendext23_w0 = sendext01_w0; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1430,6 +1482,51 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + const uint64_t lso_fmt = txq->lso_tun_fmt; + uint64_t sx_w0[NIX_DESCS_PER_LOOP]; + uint64_t sd_w1[NIX_DESCS_PER_LOOP]; + + /* Extract SD W1 as we need to set L4 types. */ + vst1q_u64(sd_w1, senddesc01_w1); + vst1q_u64(sd_w1 + 2, senddesc23_w1); + + /* Extract SX W0 as we need to set LSO fields. */ + vst1q_u64(sx_w0, sendext01_w0); + vst1q_u64(sx_w0 + 2, sendext23_w0); + + /* Extract ol_flags. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Prepare individual mbufs. */ + cn10k_nix_prepare_tso(tx_pkts[0], + (union nix_send_hdr_w1_u *)&sd_w1[0], + (union nix_send_ext_w0_u *)&sx_w0[0], + vgetq_lane_u64(xtmp128, 0), flags, lso_fmt); + + cn10k_nix_prepare_tso(tx_pkts[1], + (union nix_send_hdr_w1_u *)&sd_w1[1], + (union nix_send_ext_w0_u *)&sx_w0[1], + vgetq_lane_u64(xtmp128, 1), flags, lso_fmt); + + cn10k_nix_prepare_tso(tx_pkts[2], + (union nix_send_hdr_w1_u *)&sd_w1[2], + (union nix_send_ext_w0_u *)&sx_w0[2], + vgetq_lane_u64(ytmp128, 0), flags, lso_fmt); + + cn10k_nix_prepare_tso(tx_pkts[3], + (union nix_send_hdr_w1_u *)&sd_w1[3], + (union nix_send_ext_w0_u *)&sx_w0[3], + vgetq_lane_u64(ytmp128, 1), flags, lso_fmt); + + senddesc01_w1 = vld1q_u64(sd_w1); + senddesc23_w1 = vld1q_u64(sd_w1 + 2); + + sendext01_w0 = vld1q_u64(sx_w0); + sendext23_w0 = vld1q_u64(sx_w0 + 2); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 0b4a4c7ba..34e373750 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -13,8 +13,9 @@ { \ uint64_t cmd[sz]; \ \ - /* TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ (flags)); \ diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index c32681ed4..735e21cc6 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -66,7 +66,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)) + if (dev->scalar_ena) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index cb574a1c1..dca732a9f 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -545,6 +545,43 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, #if defined(RTE_ARCH_ARM64) +static __rte_always_inline void +cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, + union nix_send_ext_w0_u *w0, uint64_t ol_flags, + uint64_t flags) +{ + uint16_t lso_sb; + uint64_t mask; + + if (!(ol_flags & PKT_TX_TCP_SEG)) + return; + + mask = -(!w1->il3type); + lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len; + + w0->u |= BIT(14); + w0->lso_sb = lso_sb; + w0->lso_mps = m->tso_segsz; + w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6); + w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM; + + /* Handle tunnel tso */ + if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) && + (ol_flags & PKT_TX_TUNNEL_MASK)) { + const uint8_t is_udp_tun = + (CNXK_NIX_UDP_TUN_BITMASK >> + ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & + 0x1; + + w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM; + w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0; + /* Update format for UDP tunneled packet */ + w0->lso_format += is_udp_tun ? 2 : 6; + + w0->lso_format += !!(ol_flags & PKT_TX_OUTER_IPV6) << 1; + } +} + #define NIX_DESCS_PER_LOOP 4 static __rte_always_inline uint16_t cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, @@ -580,6 +617,12 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, /* Reduce the cached count */ txq->fc_cache_pkts -= pkts; + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) { + for (i = 0; i < pkts; i++) + cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags); + } + /* Lets commit any changes in the packet here as no further changes * to the packet will be done unless no fast free is enabled. */ @@ -637,6 +680,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendmem23_w1 = sendmem01_w1; } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + /* Clear the LSO enable bit. */ + sendext01_w0 = vbicq_u64(sendext01_w0, + vdupq_n_u64(BIT_ULL(14))); + sendext23_w0 = sendext01_w0; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1286,6 +1336,50 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); } + if (flags & NIX_TX_OFFLOAD_TSO_F) { + uint64_t sx_w0[NIX_DESCS_PER_LOOP]; + uint64_t sd_w1[NIX_DESCS_PER_LOOP]; + + /* Extract SD W1 as we need to set L4 types. */ + vst1q_u64(sd_w1, senddesc01_w1); + vst1q_u64(sd_w1 + 2, senddesc23_w1); + + /* Extract SX W0 as we need to set LSO fields. */ + vst1q_u64(sx_w0, sendext01_w0); + vst1q_u64(sx_w0 + 2, sendext23_w0); + + /* Extract ol_flags. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* Prepare individual mbufs. */ + cn9k_nix_prepare_tso(tx_pkts[0], + (union nix_send_hdr_w1_u *)&sd_w1[0], + (union nix_send_ext_w0_u *)&sx_w0[0], + vgetq_lane_u64(xtmp128, 0), flags); + + cn9k_nix_prepare_tso(tx_pkts[1], + (union nix_send_hdr_w1_u *)&sd_w1[1], + (union nix_send_ext_w0_u *)&sx_w0[1], + vgetq_lane_u64(xtmp128, 1), flags); + + cn9k_nix_prepare_tso(tx_pkts[2], + (union nix_send_hdr_w1_u *)&sd_w1[2], + (union nix_send_ext_w0_u *)&sx_w0[2], + vgetq_lane_u64(ytmp128, 0), flags); + + cn9k_nix_prepare_tso(tx_pkts[3], + (union nix_send_hdr_w1_u *)&sd_w1[3], + (union nix_send_ext_w0_u *)&sx_w0[3], + vgetq_lane_u64(ytmp128, 1), flags); + + senddesc01_w1 = vld1q_u64(sd_w1); + senddesc23_w1 = vld1q_u64(sd_w1 + 2); + + sendext01_w0 = vld1q_u64(sx_w0); + sendext23_w0 = vld1q_u64(sx_w0 + 2); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c index 9ade66db2..56a3e2514 100644 --- a/drivers/net/cnxk/cn9k_tx_vec.c +++ b/drivers/net/cnxk/cn9k_tx_vec.c @@ -13,8 +13,9 @@ { \ uint64_t cmd[sz]; \ \ - /* TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_TSO_F) \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ (flags)); \ -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add multi segment Tx vector routine. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_tx.c | 20 +- drivers/net/cnxk/cn10k_tx.h | 389 +++++++++++++++++++++++++-- drivers/net/cnxk/cn10k_tx_vec_mseg.c | 24 ++ drivers/net/cnxk/cn9k_tx.c | 20 +- drivers/net/cnxk/cn9k_tx.h | 272 ++++++++++++++++++- drivers/net/cnxk/cn9k_tx_vec_mseg.c | 24 ++ drivers/net/cnxk/meson.build | 6 +- 7 files changed, 710 insertions(+), 45 deletions(-) create mode 100644 drivers/net/cnxk/cn10k_tx_vec_mseg.c create mode 100644 drivers/net/cnxk/cn9k_tx_vec_mseg.c diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index d06879163..1f30bab59 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -67,13 +67,23 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena) + const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_vec_mseg_##name, + + NIX_TX_FASTPATH_MODES +#undef T + }; + + if (dev->scalar_ena) { pick_tx_func(eth_dev, nix_eth_tx_burst); - else + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + } else { pick_tx_func(eth_dev, nix_eth_tx_vec_burst); - - if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) - pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg); + } rte_mb(); } diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 26797581e..eb148b8e7 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -42,6 +42,13 @@ } \ } while (0) +/* Encoded number of segments to number of dwords macro, each value of nb_segs + * is encoded as 4bits. + */ +#define NIX_SEGDW_MAGIC 0x76654432210ULL + +#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF) + #define LMT_OFF(lmt_addr, lmt_num, offset) \ (void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset)) @@ -102,6 +109,14 @@ cn10k_nix_tx_steor_data(const uint16_t flags) return data; } +static __rte_always_inline uint8_t +cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags) +{ + return ((flags & NIX_TX_NEED_EXT_HDR) ? + (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 : + 4); +} + static __rte_always_inline uint64_t cn10k_nix_tx_steor_vec_data(const uint16_t flags) { @@ -729,7 +744,244 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, } } +static __rte_always_inline void +cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd, + union nix_send_hdr_w0_u *sh, + union nix_send_sg_s *sg, const uint32_t flags) +{ + struct rte_mbuf *m_next; + uint64_t *slist, sg_u; + uint16_t nb_segs; + int i = 1; + + sh->total = m->pkt_len; + /* Clear sg->u header before use */ + sg->u &= 0xFC00000000000000; + sg_u = sg->u; + slist = &cmd[0]; + + sg_u = sg_u | ((uint64_t)m->data_len); + + nb_segs = m->nb_segs - 1; + m_next = m->next; + + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << 55); + /* Mark mempool object as "put" since it is freed by NIX */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + + m = m_next; + /* Fill mbuf segments */ + do { + m_next = m->next; + sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); + *slist = rte_mbuf_data_iova(m); + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55)); + /* Mark mempool object as "put" since it is freed by NIX + */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << (i + 55)))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + slist++; + i++; + nb_segs--; + if (i > 2 && nb_segs) { + i = 0; + /* Next SG subdesc */ + *(uint64_t *)slist = sg_u & 0xFC00000000000000; + sg->u = sg_u; + sg->segs = 3; + sg = (union nix_send_sg_s *)slist; + sg_u = sg->u; + slist++; + } + m = m_next; + } while (nb_segs); + + sg->u = sg_u; + sg->segs = i; +} + +static __rte_always_inline void +cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0, + uint64x2_t *cmd1, const uint8_t segdw, + const uint32_t flags) +{ + union nix_send_hdr_w0_u sh; + union nix_send_sg_s sg; + + if (m->nb_segs == 1) { + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + sg.u = vgetq_lane_u64(cmd1[0], 0); + sg.u |= (cnxk_nix_prefree_seg(m) << 55); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); + } + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + sg.u = vgetq_lane_u64(cmd1[0], 0); + if (!(sg.u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + return; + } + + sh.u = vgetq_lane_u64(cmd0[0], 0); + sg.u = vgetq_lane_u64(cmd1[0], 0); + + cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags); + + sh.sizem1 = segdw - 1; + cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); +} + #define NIX_DESCS_PER_LOOP 4 + +static __rte_always_inline uint8_t +cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0, + uint64x2_t *cmd1, uint64x2_t *cmd2, + uint64x2_t *cmd3, uint8_t *segdw, + uint64_t *lmt_addr, __uint128_t *data128, + uint8_t *shift, const uint16_t flags) +{ + uint8_t j, off, lmt_used; + + if (!(flags & NIX_TX_NEED_EXT_HDR) && + !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + /* No segments in 4 consecutive packets. */ + if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) { + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) + cn10k_nix_prepare_mseg_vec(mbufs[j], NULL, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd1[0]); + vst1q_u64(lmt_addr + 4, cmd0[1]); + vst1q_u64(lmt_addr + 6, cmd1[1]); + vst1q_u64(lmt_addr + 8, cmd0[2]); + vst1q_u64(lmt_addr + 10, cmd1[2]); + vst1q_u64(lmt_addr + 12, cmd0[3]); + vst1q_u64(lmt_addr + 14, cmd1[3]); + + *data128 |= ((__uint128_t)7) << *shift; + shift += 3; + + return 1; + } + } + + lmt_used = 0; + for (j = 0; j < NIX_DESCS_PER_LOOP;) { + /* Fit consecutive packets in same LMTLINE. */ + if ((segdw[j] + segdw[j + 1]) <= 8) { + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { + cn10k_nix_prepare_mseg_vec(mbufs[j], NULL, + &cmd0[j], &cmd1[j], + segdw[j], flags); + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL, + &cmd0[j + 1], + &cmd1[j + 1], + segdw[j + 1], flags); + /* TSTAMP takes 4 each, no segs. */ + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + vst1q_u64(lmt_addr + 6, cmd3[j]); + + vst1q_u64(lmt_addr + 8, cmd0[j + 1]); + vst1q_u64(lmt_addr + 10, cmd2[j + 1]); + vst1q_u64(lmt_addr + 12, cmd1[j + 1]); + vst1q_u64(lmt_addr + 14, cmd3[j + 1]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + /* EXT header take 3 each, space for 2 segs.*/ + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 6, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + off = segdw[j] - 3; + off <<= 1; + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], + lmt_addr + 12 + off, + &cmd0[j + 1], + &cmd1[j + 1], + segdw[j + 1], flags); + vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]); + vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]); + } else { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 4, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + off = segdw[j] - 2; + off <<= 1; + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], + lmt_addr + 8 + off, + &cmd0[j + 1], + &cmd1[j + 1], + segdw[j + 1], flags); + vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]); + } + *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1) + << *shift; + *shift += 3; + j += 2; + } else { + if ((flags & NIX_TX_NEED_EXT_HDR) && + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 6, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + off = segdw[j] - 4; + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd3[j]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 6, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + } else { + cn10k_nix_prepare_mseg_vec(mbufs[j], + lmt_addr + 4, + &cmd0[j], &cmd1[j], + segdw[j], flags); + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + } + *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift; + *shift += 3; + j++; + } + lmt_used++; + lmt_addr += 16; + } + + return lmt_used; +} + static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t *cmd, const uint16_t flags) @@ -738,7 +990,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP]; - uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa; + uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint16_t left, scalar, burst, i, lmt_id; @@ -746,6 +998,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t sendext01_w1, sendext23_w1; uint64x2_t sendmem01_w0, sendmem23_w0; uint64x2_t sendmem01_w1, sendmem23_w1; + uint8_t segdw[NIX_DESCS_PER_LOOP + 1]; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn10k_eth_txq *txq = tx_queue; @@ -754,7 +1007,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64x2_t ltypes01, ltypes23; uint64x2_t xtmp128, ytmp128; uint64x2_t xmask01, xmask23; - uint8_t lnum; + uint8_t lnum, shift; + union wdata { + __uint128_t data128; + uint64_t data[2]; + } wd; NIX_XMIT_FC_OR_RETURN(txq, pkts); @@ -798,8 +1055,44 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, burst = left > cn10k_nix_pkts_per_vec_brst(flags) ? cn10k_nix_pkts_per_vec_brst(flags) : left; + if (flags & NIX_TX_MULTI_SEG_F) { + wd.data128 = 0; + shift = 16; + } lnum = 0; + for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) { + if (flags & NIX_TX_MULTI_SEG_F) { + uint8_t j; + + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) { + struct rte_mbuf *m = tx_pkts[j]; + + /* Get dwords based on nb_segs. */ + segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs); + /* Add dwords based on offloads. */ + segdw[j] += 1 + /* SEND HDR */ + !!(flags & NIX_TX_NEED_EXT_HDR) + + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); + } + + /* Check if there are enough LMTLINES for this loop */ + if (lnum + 4 > 32) { + uint8_t ldwords_con = 0, lneeded = 0; + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) { + ldwords_con += segdw[j]; + if (ldwords_con > 8) { + lneeded += 1; + ldwords_con = segdw[j]; + } + } + lneeded += 1; + if (lnum + lneeded > 32) { + burst = i; + break; + } + } + } /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */ senddesc01_w0 = vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF)); @@ -1527,7 +1820,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w0 = vld1q_u64(sx_w0 + 2); } - if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) && + !(flags & NIX_TX_MULTI_SEG_F)) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); xmask23 = xmask01; @@ -1567,7 +1861,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, (void **)&mbuf3, 1, 0); senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); - } else { + } else if (!(flags & NIX_TX_MULTI_SEG_F)) { /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1612,7 +1906,19 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); } - if (flags & NIX_TX_NEED_EXT_HDR) { + if (flags & NIX_TX_MULTI_SEG_F) { + uint8_t j; + + segdw[4] = 8; + j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1, + cmd2, cmd3, segdw, + (uint64_t *) + LMT_OFF(laddr, lnum, + 0), + &wd.data128, &shift, + flags); + lnum += j; + } else if (flags & NIX_TX_NEED_EXT_HDR) { /* Store the prepared send desc to LMT lines */ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); @@ -1664,34 +1970,55 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[0] >>= 16; + /* Trigger LMTST */ if (lnum > 16) { - data = cn10k_nix_tx_steor_vec_data(flags); - pa = io_addr | (data & 0x7) << 4; - data &= ~0x7ULL; - data |= (15ULL << 12); - data |= (uint64_t)lmt_id; + if (!(flags & NIX_TX_MULTI_SEG_F)) + wd.data[0] = cn10k_nix_tx_steor_vec_data(flags); + + pa = io_addr | (wd.data[0] & 0x7) << 4; + wd.data[0] &= ~0x7ULL; + + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[0] <<= 16; + + wd.data[0] |= (15ULL << 12); + wd.data[0] |= (uint64_t)lmt_id; /* STEOR0 */ - roc_lmt_submit_steorl(data, pa); + roc_lmt_submit_steorl(wd.data[0], pa); - data = cn10k_nix_tx_steor_vec_data(flags); - pa = io_addr | (data & 0x7) << 4; - data &= ~0x7ULL; - data |= ((uint64_t)(lnum - 17)) << 12; - data |= (uint64_t)(lmt_id + 16); + if (!(flags & NIX_TX_MULTI_SEG_F)) + wd.data[1] = cn10k_nix_tx_steor_vec_data(flags); + + pa = io_addr | (wd.data[1] & 0x7) << 4; + wd.data[1] &= ~0x7ULL; + + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[1] <<= 16; + + wd.data[1] |= ((uint64_t)(lnum - 17)) << 12; + wd.data[1] |= (uint64_t)(lmt_id + 16); /* STEOR1 */ - roc_lmt_submit_steorl(data, pa); + roc_lmt_submit_steorl(wd.data[1], pa); } else if (lnum) { - data = cn10k_nix_tx_steor_vec_data(flags); - pa = io_addr | (data & 0x7) << 4; - data &= ~0x7ULL; - data |= ((uint64_t)(lnum - 1)) << 12; - data |= lmt_id; + if (!(flags & NIX_TX_MULTI_SEG_F)) + wd.data[0] = cn10k_nix_tx_steor_vec_data(flags); + + pa = io_addr | (wd.data[0] & 0x7) << 4; + wd.data[0] &= ~0x7ULL; + + if (flags & NIX_TX_MULTI_SEG_F) + wd.data[0] <<= 16; + + wd.data[0] |= ((uint64_t)(lnum - 1)) << 12; + wd.data[0] |= lmt_id; /* STEOR0 */ - roc_lmt_submit_steorl(data, pa); + roc_lmt_submit_steorl(wd.data[0], pa); } left -= burst; @@ -1699,9 +2026,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (left) goto again; - if (unlikely(scalar)) - pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd, - flags); + if (unlikely(scalar)) { + if (flags & NIX_TX_MULTI_SEG_F) + pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, + scalar, cmd, flags); + else + pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, + cmd, flags); + } return pkts; } @@ -1866,7 +2198,10 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \ - void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ NIX_TX_FASTPATH_MODES #undef T diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c new file mode 100644 index 000000000..1fad81dba --- /dev/null +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_ethdev.h" +#include "cn10k_tx.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \ + { \ + uint64_t cmd[sz]; \ + \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ + return 0; \ + return cn10k_nix_xmit_pkts_vector( \ + tx_queue, tx_pkts, pkts, cmd, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index 735e21cc6..763f9a14f 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -66,13 +66,23 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; - if (dev->scalar_ena) + const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_nix_xmit_pkts_vec_mseg_##name, + + NIX_TX_FASTPATH_MODES +#undef T + }; + + if (dev->scalar_ena) { pick_tx_func(eth_dev, nix_eth_tx_burst); - else + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + } else { pick_tx_func(eth_dev, nix_eth_tx_vec_burst); - - if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) - pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg); + } rte_mb(); } diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index dca732a9f..ed65cd351 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -582,7 +582,238 @@ cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, } } +static __rte_always_inline uint8_t +cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd, + union nix_send_hdr_w0_u *sh, + union nix_send_sg_s *sg, const uint32_t flags) +{ + struct rte_mbuf *m_next; + uint64_t *slist, sg_u; + uint16_t nb_segs; + uint64_t segdw; + int i = 1; + + sh->total = m->pkt_len; + /* Clear sg->u header before use */ + sg->u &= 0xFC00000000000000; + sg_u = sg->u; + slist = &cmd[0]; + + sg_u = sg_u | ((uint64_t)m->data_len); + + nb_segs = m->nb_segs - 1; + m_next = m->next; + + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << 55); + /* Mark mempool object as "put" since it is freed by NIX */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + + m = m_next; + /* Fill mbuf segments */ + do { + m_next = m->next; + sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); + *slist = rte_mbuf_data_iova(m); + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55)); + /* Mark mempool object as "put" since it is freed by NIX + */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << (i + 55)))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + slist++; + i++; + nb_segs--; + if (i > 2 && nb_segs) { + i = 0; + /* Next SG subdesc */ + *(uint64_t *)slist = sg_u & 0xFC00000000000000; + sg->u = sg_u; + sg->segs = 3; + sg = (union nix_send_sg_s *)slist; + sg_u = sg->u; + slist++; + } + m = m_next; + } while (nb_segs); + + sg->u = sg_u; + sg->segs = i; + segdw = (uint64_t *)slist - (uint64_t *)&cmd[0]; + + segdw += 2; + /* Roundup extra dwords to multiple of 2 */ + segdw = (segdw >> 1) + (segdw & 0x1); + /* Default dwords */ + segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) + + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); + sh->sizem1 = segdw - 1; + + return segdw; +} + +static __rte_always_inline uint8_t +cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0, + uint64x2_t *cmd1, const uint32_t flags) +{ + union nix_send_hdr_w0_u sh; + union nix_send_sg_s sg; + uint8_t ret; + + if (m->nb_segs == 1) { + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + sg.u = vgetq_lane_u64(cmd1[0], 0); + sg.u |= (cnxk_nix_prefree_seg(m) << 55); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); + } + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + sg.u = vgetq_lane_u64(cmd1[0], 0); + if (!(sg.u & (1ULL << 55))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); + rte_io_wmb(); +#endif + return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) + + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); + } + + sh.u = vgetq_lane_u64(cmd0[0], 0); + sg.u = vgetq_lane_u64(cmd1[0], 0); + + ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags); + + cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0); + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); + return ret; +} + #define NIX_DESCS_PER_LOOP 4 + +static __rte_always_inline void +cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1, + uint64x2_t *cmd2, uint64x2_t *cmd3, + uint8_t *segdw, + uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2], + uint64_t *lmt_addr, rte_iova_t io_addr, + const uint32_t flags) +{ + uint64_t lmt_status; + uint8_t j, off; + + if (!(flags & NIX_TX_NEED_EXT_HDR) && + !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + /* No segments in 4 consecutive packets. */ + if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) { + do { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd1[0]); + vst1q_u64(lmt_addr + 4, cmd0[1]); + vst1q_u64(lmt_addr + 6, cmd1[1]); + vst1q_u64(lmt_addr + 8, cmd0[2]); + vst1q_u64(lmt_addr + 10, cmd1[2]); + vst1q_u64(lmt_addr + 12, cmd0[3]); + vst1q_u64(lmt_addr + 14, cmd1[3]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + + return; + } + } + + for (j = 0; j < NIX_DESCS_PER_LOOP;) { + /* Fit consecutive packets in same LMTLINE. */ + if ((segdw[j] + segdw[j + 1]) <= 8) { +again0: + if ((flags & NIX_TX_NEED_EXT_HDR) && + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 4; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd3[j]); + + vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]); + vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]); + roc_lmt_mov_seg(lmt_addr + 14 + off, + slist[j + 1], segdw[j + 1] - 4); + off += ((segdw[j + 1] - 4) << 1); + vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 3; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]); + vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]); + roc_lmt_mov_seg(lmt_addr + 12 + off, + slist[j + 1], segdw[j + 1] - 3); + } else { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 2; + roc_lmt_mov_seg(lmt_addr + 4, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]); + vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]); + roc_lmt_mov_seg(lmt_addr + 8 + off, + slist[j + 1], segdw[j + 1] - 2); + } + lmt_status = roc_lmt_submit_ldeor(io_addr); + if (lmt_status == 0) + goto again0; + j += 2; + } else { +again1: + if ((flags & NIX_TX_NEED_EXT_HDR) && + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 4; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + off <<= 1; + vst1q_u64(lmt_addr + 6 + off, cmd3[j]); + } else if (flags & NIX_TX_NEED_EXT_HDR) { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd2[j]); + vst1q_u64(lmt_addr + 4, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 3; + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); + } else { + vst1q_u64(lmt_addr, cmd0[j]); + vst1q_u64(lmt_addr + 2, cmd1[j]); + /* Copy segs */ + off = segdw[j] - 2; + roc_lmt_mov_seg(lmt_addr + 4, slist[j], off); + } + lmt_status = roc_lmt_submit_ldeor(io_addr); + if (lmt_status == 0) + goto again1; + j += 1; + } + } +} + static __rte_always_inline uint16_t cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t *cmd, const uint16_t flags) @@ -1380,7 +1611,8 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sendext23_w0 = vld1q_u64(sx_w0 + 2); } - if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) && + !(flags & NIX_TX_MULTI_SEG_F)) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); xmask23 = xmask01; @@ -1424,7 +1656,7 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, * cnxk_nix_prefree_seg are written before LMTST. */ rte_io_wmb(); - } else { + } else if (!(flags & NIX_TX_MULTI_SEG_F)) { /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1472,7 +1704,27 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); } - if (flags & NIX_TX_NEED_EXT_HDR) { + if (flags & NIX_TX_MULTI_SEG_F) { + uint64_t seg_list[NIX_DESCS_PER_LOOP] + [CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; + uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1]; + + /* Build mseg list for each packet individually. */ + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) + segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j], + seg_list[j], &cmd0[j], + &cmd1[j], flags); + segdw[4] = 8; + + /* Commit all changes to mbuf before LMTST. */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + rte_io_wmb(); + + cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3, + segdw, seg_list, + lmt_addr, io_addr, + flags); + } else if (flags & NIX_TX_NEED_EXT_HDR) { /* With ext header in the command we can no longer send * all 4 packets together since LMTLINE is 128bytes. * Split and Tx twice. @@ -1534,9 +1786,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } - if (unlikely(pkts_left)) - pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd, - flags); + if (unlikely(pkts_left)) { + if (flags & NIX_TX_MULTI_SEG_F) + pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, + pkts_left, cmd, flags); + else + pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, + cmd, flags); + } return pkts; } @@ -1701,6 +1958,9 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ \ uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ + \ + uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn9k_tx_vec_mseg.c b/drivers/net/cnxk/cn9k_tx_vec_mseg.c new file mode 100644 index 000000000..0256efd45 --- /dev/null +++ b/drivers/net/cnxk/cn9k_tx_vec_mseg.c @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_ethdev.h" +#include "cn9k_tx.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \ + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \ + { \ + uint64_t cmd[sz]; \ + \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ + return 0; \ + return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ + (flags) | \ + NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build index aa8c7253f..361f7ce84 100644 --- a/drivers/net/cnxk/meson.build +++ b/drivers/net/cnxk/meson.build @@ -26,7 +26,8 @@ sources += files('cn9k_ethdev.c', 'cn9k_rx_vec_mseg.c', 'cn9k_tx.c', 'cn9k_tx_mseg.c', - 'cn9k_tx_vec.c') + 'cn9k_tx_vec.c', + 'cn9k_tx_vec_mseg.c') # CN10K sources += files('cn10k_ethdev.c', 'cn10k_rte_flow.c', @@ -36,7 +37,8 @@ sources += files('cn10k_ethdev.c', 'cn10k_rx_vec_mseg.c', 'cn10k_tx.c', 'cn10k_tx_mseg.c', - 'cn10k_tx_vec.c') + 'cn10k_tx_vec.c', + 'cn10k_tx_vec_mseg.c') deps += ['bus_pci', 'cryptodev', 'eventdev', 'security'] deps += ['common_cnxk', 'mempool_cnxk'] -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter. Resize cn10k workslot fastpath structure to fit in 64B cacheline size. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- v5 Changes: - Use cnxk_eth_rxq_to_sp instead of manually calculating sp offset. v4 Changes: - Split patches for easier merge. v3 Changes: - Spell check. doc/guides/eventdevs/cnxk.rst | 28 ++++ doc/guides/rel_notes/release_21_08.rst | 5 + drivers/common/cnxk/roc_nix.h | 3 + drivers/common/cnxk/roc_nix_fc.c | 78 ++++++++++ drivers/common/cnxk/roc_nix_priv.h | 3 +- drivers/common/cnxk/version.map | 1 + drivers/event/cnxk/cn10k_eventdev.c | 107 +++++++++++--- drivers/event/cnxk/cn10k_worker.c | 7 +- drivers/event/cnxk/cn10k_worker.h | 32 +++-- drivers/event/cnxk/cn9k_eventdev.c | 89 ++++++++++++ drivers/event/cnxk/cn9k_worker.h | 4 + drivers/event/cnxk/cnxk_eventdev.c | 2 + drivers/event/cnxk/cnxk_eventdev.h | 43 ++++-- drivers/event/cnxk/cnxk_eventdev_adptr.c | 176 +++++++++++++++++++++++ drivers/event/cnxk/meson.build | 9 +- 15 files changed, 540 insertions(+), 47 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 36da3800c..b7e82c127 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -39,6 +39,10 @@ Features of the OCTEON cnxk SSO PMD are: time granularity of 2.5us on CN9K and 1us on CN10K. - Up to 256 TIM rings a.k.a event timer adapters. - Up to 8 rings traversed in parallel. +- HW managed packets enqueued from ethdev to eventdev exposed through event eth + RX adapter. +- N:1 ethernet device Rx queue to Event queue mapping. +- Full Rx offload support defined through ethdev queue configuration. Prerequisites and Compilation procedure --------------------------------------- @@ -93,6 +97,15 @@ Runtime Config Options -a 0002:0e:00.0,qos=[1-50-50-50] +- ``Force Rx Back pressure`` + + Force Rx back pressure when same mempool is used across ethernet device + connected to event device. + + For example:: + + -a 0002:0e:00.0,force_rx_bp=1 + - ``TIM disable NPA`` By default chunks are allocated from NPA then TIM can automatically free @@ -160,3 +173,18 @@ Debugging Options +---+------------+-------------------------------------------------------+ | 2 | TIM | --log-level='pmd\.event\.cnxk\.timer,8' | +---+------------+-------------------------------------------------------+ + +Limitations +----------- + +Rx adapter support +~~~~~~~~~~~~~~~~~~ + +Using the same mempool for all the ethernet device ports connected to +event device would cause back pressure to be asserted only on the first +ethernet device. +Back pressure is automatically disabled when using same mempool for all the +ethernet devices connected to event device to override this applications can +use `force_rx_bp=1` device arguments. +Using unique mempool per each ethernet device is recommended when they are +connected to event device. diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 31e49e1a5..3892c8017 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -60,6 +60,11 @@ New Features * Added net/cnxk driver which provides the support for the integrated ethernet device. +* **Added support for Marvell CN10K, CN9K, event Rx adapter.** + + * Added Rx adapter support for event/cnxk when the ethernet device requested is + net/cnxk. + Removed Items ------------- diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index bb6902795..76613fe84 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix); +void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, + uint8_t ena, uint8_t force); + /* NPC */ int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable); diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c index 47be8aa3f..f17eba416 100644 --- a/drivers/common/cnxk/roc_nix_fc.c +++ b/drivers/common/cnxk/roc_nix_fc.c @@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode mode) exit: return rc; } + +void +rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena, + uint8_t force) +{ + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + struct npa_lf *lf = idev_npa_obj_get(); + struct npa_aq_enq_req *req; + struct npa_aq_enq_rsp *rsp; + struct mbox *mbox; + uint32_t limit; + int rc; + + if (roc_nix_is_sdp(roc_nix)) + return; + + if (!lf) + return; + mbox = lf->mbox; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_READ; + + rc = mbox_process_msg(mbox, (void *)&rsp); + if (rc) + return; + + limit = rsp->aura.limit; + /* BP is already enabled. */ + if (rsp->aura.bp_ena) { + /* If BP ids don't match disable BP. */ + if ((rsp->aura.nix0_bpid != nix->bpid[0]) && !force) { + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + req->aura.bp_ena = 0; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); + } + return; + } + + /* BP was previously enabled but now disabled skip. */ + if (rsp->aura.bp) + return; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + if (ena) { + req->aura.nix0_bpid = nix->bpid[0]; + req->aura_mask.nix0_bpid = ~(req->aura_mask.nix0_bpid); + req->aura.bp = NIX_RQ_AURA_THRESH( + limit > 128 ? 256 : limit); /* 95% of size*/ + req->aura_mask.bp = ~(req->aura_mask.bp); + } + + req->aura.bp_ena = !!ena; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); +} diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h index d9c32df44..9dc0c88a6 100644 --- a/drivers/common/cnxk/roc_nix_priv.h +++ b/drivers/common/cnxk/roc_nix_priv.h @@ -16,7 +16,8 @@ #define NIX_SQB_LOWER_THRESH ((uint16_t)70) /* Apply BP/DROP when CQ is 95% full */ -#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_RQ_AURA_THRESH(x) (((x) * 95) / 100) /* IRQ triggered when NIX_LF_CINTX_CNT[QCOUNT] crosses this value */ #define CQ_CQE_THRESH_DEFAULT 0x1ULL diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map index 8a5c839e5..cb1ce4b6f 100644 --- a/drivers/common/cnxk/version.map +++ b/drivers/common/cnxk/version.map @@ -29,6 +29,7 @@ INTERNAL { roc_nix_fc_config_set; roc_nix_fc_mode_set; roc_nix_fc_mode_get; + rox_nix_fc_npa_bp_cfg; roc_nix_get_base_chan; roc_nix_get_pf; roc_nix_get_pf_func; diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index bf4052c76..2060c8fe8 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -6,18 +6,6 @@ #include "cnxk_eventdev.h" #include "cnxk_worker.h" -static void -cn10k_init_hws_ops(struct cn10k_sso_hws *ws, uintptr_t base) -{ - ws->tag_wqe_op = base + SSOW_LF_GWS_WQE0; - ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0; - ws->updt_wqe_op = base + SSOW_LF_GWS_OP_UPD_WQP_GRP1; - ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM; - ws->swtag_untag_op = base + SSOW_LF_GWS_OP_SWTAG_UNTAG; - ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH; - ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED; -} - static uint32_t cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev) { @@ -56,7 +44,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); - cn10k_init_hws_ops(ws, ws->base); ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -135,13 +122,14 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cq_ds_cnt &= 0x3FFF3FFF0000; while (aq_cnt || cq_ds_cnt || ds_cnt) { - plt_write64(req, ws->getwrk_op); + plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0); cn10k_sso_hws_get_work_empty(ws, &ev); if (fn != NULL && ev.u64 != 0) fn(arg, ev); if (ev.sched_type != SSO_TT_EMPTY) - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, - ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush( + ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); do { val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE); } while (val & BIT_ULL(56)); @@ -205,9 +193,11 @@ cn10k_sso_hws_reset(void *arg, void *hws) if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) { - plt_write64(BIT_ULL(16) | 1, ws->getwrk_op); + plt_write64(BIT_ULL(16) | 1, + ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0)); if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */ @@ -407,6 +397,80 @@ cn10k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn10k)); } +static int +cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } +} + +static int +cn10k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn10k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn10k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -420,6 +484,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .port_unlink = cn10k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -502,6 +572,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn10k, cn10k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN10K_SSO_GW_MODE "=<int>" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index e2aa534c6..5dbae275b 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -18,7 +18,8 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev) cn10k_sso_hws_forward_event(ws, ev); break; case RTE_EVENT_OP_RELEASE: - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); break; default: return 0; @@ -69,7 +70,7 @@ cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return 1; } @@ -94,7 +95,7 @@ cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return ret; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 2f093a8dd..c7250bf9e 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn10k_ethdev.h" +#include "cn10k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t @@ -31,7 +35,8 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) { const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(ws->tag_wqe_op)); + const uint8_t cur_tt = + CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)); /* CNXK model * cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED @@ -43,9 +48,11 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) if (new_tt == SSO_TT_UNTAGGED) { if (cur_tt != SSO_TT_UNTAGGED) - cnxk_sso_hws_swtag_untag(ws->swtag_untag_op); + cnxk_sso_hws_swtag_untag(ws->base + + SSOW_LF_GWS_OP_SWTAG_UNTAG); } else { - cnxk_sso_hws_swtag_norm(tag, new_tt, ws->swtag_norm_op); + cnxk_sso_hws_swtag_norm(tag, new_tt, + ws->base + SSOW_LF_GWS_OP_SWTAG_NORM); } ws->swtag_req = 1; } @@ -57,8 +64,9 @@ cn10k_sso_hws_fwd_group(struct cn10k_sso_hws *ws, const struct rte_event *ev, const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - plt_write64(ev->u64, ws->updt_wqe_op); - cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op); + plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1); + cnxk_sso_hws_swtag_desched(tag, new_tt, grp, + ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED); } static __rte_always_inline void @@ -68,7 +76,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, const uint8_t grp = ev->queue_id; /* Group hasn't changed, Use SWTAG to forward the event */ - if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_wqe_op)) == grp) + if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp) cn10k_sso_hws_fwd_swtag(ws, ev); else /* @@ -93,12 +101,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" : [wdata] "+r"(gw.get_work) - : [gw_loc] "r"(ws->getwrk_op) + : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else - plt_write64(gw.u64[0], ws->getwrk_op); + plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | @@ -130,11 +139,12 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) - : [tag_loc] "r"(ws->tag_wqe_op) + : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 0684417ea..072800c24 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -481,6 +481,88 @@ cn9k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn9k)); } +static int +cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + dws->lookup_mem = lookup_mem; + dws->tstamp = tstmp_info; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } + } +} + +static int +cn9k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn9k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn9k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -494,6 +576,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .port_unlink = cn9k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -571,6 +659,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn9k, cn9k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn9k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn9k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN9K_SSO_SINGLE_WS "=1" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 38fca08fb..f5a440146 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN9K_WORKER_H__ #define __CN9K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn9k_ethdev.h" +#include "cn9k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c index 7189ee3a7..cfd7fb971 100644 --- a/drivers/event/cnxk/cnxk_eventdev.c +++ b/drivers/event/cnxk/cnxk_eventdev.c @@ -571,6 +571,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs) &dev->xae_cnt); rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict, dev); + rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_value, + &dev->force_ena_bp); rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_value, &single_ws); rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 668e51d62..b65d725f5 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -6,6 +6,8 @@ #define __CNXK_EVENTDEV_H__ #include <rte_devargs.h> +#include <rte_ethdev.h> +#include <rte_event_eth_rx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -18,6 +20,7 @@ #define CNXK_SSO_XAE_CNT "xae_cnt" #define CNXK_SSO_GGRP_QOS "qos" +#define CNXK_SSO_FORCE_BP "force_rx_bp" #define CN9K_SSO_SINGLE_WS "single_ws" #define CN10K_SSO_GW_MODE "gw_mode" @@ -81,7 +84,10 @@ struct cnxk_sso_evdev { uint64_t nb_xaq_cfg; rte_iova_t fc_iova; struct rte_mempool *xaq_pool; + uint64_t rx_offloads; uint64_t adptr_xae_cnt; + uint16_t rx_adptr_pool_cnt; + uint64_t *rx_adptr_pools; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -89,25 +95,18 @@ struct cnxk_sso_evdev { uint32_t xae_cnt; uint8_t qos_queue_cnt; struct cnxk_sso_qos *qos_parse_data; + uint8_t force_ena_bp; /* CN9K */ uint8_t dual_ws; /* CN10K */ uint8_t gw_mode; } __rte_cache_aligned; -/* CN10K HWS ops */ -#define CN10K_SSO_HWS_OPS \ - uintptr_t swtag_desched_op; \ - uintptr_t swtag_flush_op; \ - uintptr_t swtag_untag_op; \ - uintptr_t swtag_norm_op; \ - uintptr_t updt_wqe_op; \ - uintptr_t tag_wqe_op; \ - uintptr_t getwrk_op - struct cn10k_sso_hws { - /* Get Work Fastpath data */ - CN10K_SSO_HWS_OPS; + uint64_t base; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint32_t gw_wdata; uint8_t swtag_req; uint8_t hws_id; @@ -115,7 +114,6 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; uintptr_t lmt_base; } __rte_cache_aligned; @@ -132,6 +130,9 @@ struct cn10k_sso_hws { struct cn9k_sso_hws { /* Get Work Fastpath data */ CN9K_SSO_HWS_OPS; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t hws_id; /* Add Work Fastpath data */ @@ -148,6 +149,9 @@ struct cn9k_sso_hws_state { struct cn9k_sso_hws_dual { /* Get Work Fastpath data */ struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */ + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t vws; /* Ping pong bit */ uint8_t hws_id; @@ -250,4 +254,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev, /* CN9K */ void cn9k_sso_set_rsrc(void *arg); +/* Common adapter ops */ +int cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf); +int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id); +int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); +int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); + #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 89a1d82c1..24bfd985e 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -2,6 +2,7 @@ * Copyright(C) 2021 Marvell. */ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" void @@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, int i; switch (event_type) { + case RTE_EVENT_TYPE_ETHDEV: { + struct cnxk_eth_rxq_sp *rxq = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->rx_adptr_pool_cnt; i++) { + if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i]) + return; + } + + dev->rx_adptr_pool_cnt++; + old_ptr = dev->rx_adptr_pools; + dev->rx_adptr_pools = rte_realloc( + dev->rx_adptr_pools, + sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0); + if (dev->rx_adptr_pools == NULL) { + dev->adptr_xae_cnt += rxq->qconf.mp->size; + dev->rx_adptr_pools = old_ptr; + dev->rx_adptr_pool_cnt--; + return; + } + dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] = + (uint64_t)rxq->qconf.mp; + + dev->adptr_xae_cnt += rxq->qconf.mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; @@ -65,3 +92,152 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, break; } } + +static int +cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, + uint16_t port_id, const struct rte_event *ev, + uint8_t custom_flowid) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 1; + rq->tt = ev->sched_type; + rq->hwgrp = ev->queue_id; + rq->flow_tag_width = 20; + rq->wqe_skip = 1; + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4)) + << 24; + + if (custom_flowid) { + rq->flow_tag_width = 0; + rq->tag_mask |= ev->flow_id; + } + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 0; + rq->flow_tag_width = 32; + rq->tag_mask = 0; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +int +cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t port = eth_dev->data->port_id; + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + rxq_sp = eth_dev->data->rx_queues[i]; + rxq_sp = rxq_sp - 1; + cnxk_sso_updt_xae_cnt(dev, rxq_sp, + RTE_EVENT_TYPE_ETHDEV); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc |= cnxk_sso_rxq_enable( + cnxk_eth_dev, i, port, &queue_conf->ev, + !!(queue_conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID)); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, true, + dev->force_ena_bp); + } + } else { + rxq_sp = eth_dev->data->rx_queues[rx_queue_id]; + rxq_sp = rxq_sp - 1; + cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc |= cnxk_sso_rxq_enable( + cnxk_eth_dev, (uint16_t)rx_queue_id, port, + &queue_conf->ev, + !!(queue_conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID)); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, true, + dev->force_ena_bp); + } + + if (rc < 0) { + plt_err("Failed to configure Rx adapter port=%d, q=%d", port, + queue_conf->ev.queue_id); + return rc; + } + + dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags; + + return 0; +} + +int +cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + RTE_SET_USED(event_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + rxq_sp = eth_dev->data->rx_queues[rx_queue_id]; + rxq_sp = rxq_sp - 1; + rc = cnxk_sso_rxq_disable(cnxk_eth_dev, i); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, false, + dev->force_ena_bp); + } + } else { + rxq_sp = eth_dev->data->rx_queues[rx_queue_id]; + rxq_sp = rxq_sp - 1; + rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, false, + dev->force_ena_bp); + } + + if (rc < 0) + plt_err("Failed to clear Rx adapter config port=%d, q=%d", + eth_dev->data->port_id, rx_queue_id); + + return rc; +} + +int +cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +int +cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index 87bb9f76a..eda562f5b 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -21,4 +21,11 @@ sources = files( 'cnxk_tim_worker.c', ) -deps += ['bus_pci', 'common_cnxk'] +extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing'] +foreach flag: extra_flags + if cc.has_argument(flag) + cflags += flag + endif +endforeach + +deps += ['bus_pci', 'common_cnxk', 'net_cnxk'] -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 136 +++++++- drivers/event/cnxk/cn10k_worker.c | 54 ---- drivers/event/cnxk/cn10k_worker.h | 97 +++++- drivers/event/cnxk/cn10k_worker_deq.c | 44 +++ drivers/event/cnxk/cn10k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn10k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_eventdev.c | 305 +++++++++++++++++- drivers/event/cnxk/cn9k_worker.c | 117 ------- drivers/event/cnxk/cn9k_worker.h | 174 ++++++++-- drivers/event/cnxk/cn9k_worker_deq.c | 44 +++ drivers/event/cnxk/cn9k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn9k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_worker_dual_deq.c | 53 +++ .../event/cnxk/cn9k_worker_dual_deq_burst.c | 30 ++ drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 89 +++++ drivers/event/cnxk/cnxk_eventdev.h | 1 + drivers/event/cnxk/meson.build | 9 + 17 files changed, 1124 insertions(+), 231 deletions(-) create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 2060c8fe8..ba7d95fff 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -237,17 +237,141 @@ static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn10k_sso_hws_deq; - event_dev->dequeue_burst = cn10k_sso_hws_deq_burst; - if (dev->is_timeout_deq) { - event_dev->dequeue = cn10k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } } diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index 5dbae275b..c71aa3732 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -60,57 +60,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return 1; - } - - return cn10k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return ret; - } - - ret = cn10k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn10k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index c7250bf9e..b724083ca 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -87,20 +87,37 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, cn10k_sso_hws_fwd_group(ws, ev, grp); } +static __rte_always_inline void +cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t -cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) +cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, void *lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; gw.get_work = ws->gw_wdata; #if defined(RTE_ARCH_ARM64) && !defined(__clang__) asm volatile( PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" - : [wdata] "+r"(gw.get_work) + "sub %[mbuf], %H[wdata], #0x80 \n" + : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf) : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else @@ -109,11 +126,34 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -128,6 +168,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -138,7 +179,9 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " ldp %[tag], %[wqp], [%[tag_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else @@ -146,12 +189,25 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -170,16 +226,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn10k_worker_deq.c b/drivers/event/cnxk/cn10k_worker_deq.c new file mode 100644 index 000000000..36ec454cc --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_burst.c b/drivers/event/cnxk/cn10k_worker_deq_burst.c new file mode 100644 index 000000000..29ecc551c --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_tmo.c b/drivers/event/cnxk/cn10k_worker_deq_tmo.c new file mode 100644 index 000000000..c8524a27b --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 072800c24..e386cb784 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -252,17 +252,202 @@ static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + /* Single WS modes */ + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + /* Dual WS modes */ + const event_dequeue_t sso_hws_dual_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn9k_sso_hws_deq; - event_dev->dequeue_burst = cn9k_sso_hws_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } if (dev->dual_ws) { @@ -272,14 +457,110 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) event_dev->enqueue_forward_burst = cn9k_sso_hws_dual_enq_fwd_burst; - event_dev->dequeue = cn9k_sso_hws_dual_deq; - event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq; - event_dev->dequeue_burst = - cn9k_sso_hws_dual_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_dual_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_dual_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } } } + + rte_mb(); } static void * diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c index 9ceacc98d..538bc4b0b 100644 --- a/drivers/event/cnxk/cn9k_worker.c +++ b/drivers/event/cnxk/cn9k_worker.c @@ -60,60 +60,6 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } -uint16_t __rte_hot -cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return 1; - } - - return cn9k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return ret; - } - - ret = cn9k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn9k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} - /* Dual ws ops. */ uint16_t __rte_hot @@ -171,66 +117,3 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t gw; - - RTE_SET_USED(timeout_ticks); - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return 1; - } - - gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - return gw; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t ret = 1; - uint64_t iter; - - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return ret; - } - - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - } - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index f5a440146..c01c00e1d 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -128,17 +128,36 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, } } +static __rte_always_inline void +cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, struct cn9k_sso_hws_state *ws_pair, - struct rte_event *ev) + struct rte_event *ev, const uint32_t flags, + const void *const lookup_mem, + struct cnxk_timesync_info *const tstamp) { const uint64_t set_gw = BIT_ULL(16) | 1; union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE "rty%=: \n" @@ -147,7 +166,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, " tbnz %[tag], 63, rty%= \n" "done%=: str %[gw], [%[pong]] \n" " dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op), [gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op)); #else @@ -156,12 +178,34 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); plt_write64(set_gw, ws_pair->getwrk_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -169,16 +213,22 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, } static __rte_always_inline uint16_t -cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) +cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, const void *const lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; plt_write64(BIT_ULL(16) | /* wait for work. */ 1, /* Use Mask set 0. */ ws->getwrk_op); + + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE " ldr %[tag], [%[tag_loc]] \n" @@ -190,7 +240,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -198,12 +251,35 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -218,6 +294,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -230,7 +307,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -238,12 +317,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -274,28 +366,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); - -uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c new file mode 100644 index 000000000..51ccaf4ec --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_burst.c b/drivers/event/cnxk/cn9k_worker_deq_burst.c new file mode 100644 index 000000000..4e2801459 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c new file mode 100644 index 000000000..9713d1ef0 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c new file mode 100644 index 000000000..709fa2d9e --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c new file mode 100644 index 000000000..d50e1cf83 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c new file mode 100644 index 000000000..a0508fdf0 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_##name(port, ev, \ + timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index b65d725f5..9d5d2d033 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -33,6 +33,7 @@ #define CNXK_SSO_MZ_NAME "cnxk_evdev_mz" #define CNXK_SSO_XAQ_CACHE_CNT (0x7) #define CNXK_SSO_XAQ_SLACK (8) +#define CNXK_SSO_WQE_SG_PTR (9) #define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) #define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY) diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index eda562f5b..c5c1c0ee8 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -11,8 +11,17 @@ endif sources = files( 'cn9k_eventdev.c', 'cn9k_worker.c', + 'cn9k_worker_deq.c', + 'cn9k_worker_deq_burst.c', + 'cn9k_worker_deq_tmo.c', + 'cn9k_worker_dual_deq.c', + 'cn9k_worker_dual_deq_burst.c', + 'cn9k_worker_dual_deq_tmo.c', 'cn10k_eventdev.c', 'cn10k_worker.c', + 'cn10k_worker_deq.c', + 'cn10k_worker_deq_burst.c', + 'cn10k_worker_deq_tmo.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 4 +- doc/guides/rel_notes/release_21_08.rst | 6 +- drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++++++ drivers/event/cnxk/cn9k_eventdev.c | 117 +++++++++++++++++++++++ drivers/event/cnxk/cnxk_eventdev.h | 21 +++- drivers/event/cnxk/cnxk_eventdev_adptr.c | 106 ++++++++++++++++++++ 6 files changed, 339 insertions(+), 6 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index b7e82c127..6fdccc2ab 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -42,7 +42,9 @@ Features of the OCTEON cnxk SSO PMD are: - HW managed packets enqueued from ethdev to eventdev exposed through event eth RX adapter. - N:1 ethernet device Rx queue to Event queue mapping. -- Full Rx offload support defined through ethdev queue configuration. +- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` + capability while maintaining receive packet order. +- Full Rx/Tx offload support defined through ethdev queue configuration. Prerequisites and Compilation procedure --------------------------------------- diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 3892c8017..80ff93269 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -60,10 +60,10 @@ New Features * Added net/cnxk driver which provides the support for the integrated ethernet device. -* **Added support for Marvell CN10K, CN9K, event Rx adapter.** +* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.** - * Added Rx adapter support for event/cnxk when the ethernet device requested is - net/cnxk. + * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested + is net/cnxk. Removed Items diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index ba7d95fff..8a9b04a3d 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); + ws->tx_base = ws->base; ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn10k_sso_hws) + + (sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + + return 0; +} + static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset, cn10k_sso_hws_flush_events); if (rc < 0) @@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + return cn10k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index e386cb784..bdc563223 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(dws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + dws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&dws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = dws; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + } + rte_mb(); + + return 0; +} + static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset, cn9k_sso_hws_flush_events); if (rc < 0) @@ -844,6 +908,55 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + return cn9k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -863,6 +976,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 9d5d2d033..458fdc8d9 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -8,6 +8,7 @@ #include <rte_devargs.h> #include <rte_ethdev.h> #include <rte_event_eth_rx_adapter.h> +#include <rte_event_eth_tx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -86,9 +87,12 @@ struct cnxk_sso_evdev { rte_iova_t fc_iova; struct rte_mempool *xaq_pool; uint64_t rx_offloads; + uint64_t tx_offloads; uint64_t adptr_xae_cnt; uint16_t rx_adptr_pool_cnt; uint64_t *rx_adptr_pools; + uint64_t *tx_adptr_data; + uint16_t max_port_id; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -115,7 +119,10 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; + /* Tx Fastpath data */ + uint64_t tx_base __rte_cache_aligned; uintptr_t lmt_base; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; /* CN9K HWS ops */ @@ -140,7 +147,9 @@ struct cn9k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; + /* Tx Fastpath data */ + uint64_t base __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cn9k_sso_hws_state { @@ -160,7 +169,9 @@ struct cn9k_sso_hws_dual { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base[2]; + /* Tx Fastpath data */ + uint64_t base[2] __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cnxk_sso_hws_cookie { @@ -267,5 +278,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); +int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); +int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 24bfd985e..548d7b81c 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -5,6 +5,8 @@ #include "cnxk_ethdev.h" #include "cnxk_eventdev.h" +#define CNXK_SSO_SQB_LIMIT (0x180) + void cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, uint32_t event_type) @@ -241,3 +243,107 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, return 0; } + +static int +cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs) +{ + uint16_t sqb_limit; + + sqb_limit = RTE_MIN(nb_sqb_bufs, sq->nb_sqb_bufs); + return roc_npa_aura_limit_modify(sq->aura_handle, sqb_limit); +} + +static int +cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev, + uint16_t eth_port_id, uint16_t tx_queue_id, + void *txq) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t max_port_id = dev->max_port_id; + uint64_t *txq_data = dev->tx_adptr_data; + + if (txq_data == NULL || eth_port_id > max_port_id) { + max_port_id = RTE_MAX(max_port_id, eth_port_id); + txq_data = rte_realloc_socket( + txq_data, + (sizeof(uint64_t) * (max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, event_dev->data->socket_id); + if (txq_data == NULL) + return -ENOMEM; + } + + ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) + txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq; + dev->max_port_id = max_port_id; + dev->tx_adptr_data = txq_data; + return 0; +} + +int +cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct roc_nix_sq *sq; + int i, ret; + void *txq; + + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { + txq = eth_dev->data->tx_queues[i]; + sq = &cnxk_eth_dev->sqs[i]; + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, i, txq); + if (ret < 0) + return ret; + } + } else { + txq = eth_dev->data->tx_queues[tx_queue_id]; + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, txq); + if (ret < 0) + return ret; + } + + dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags; + + return 0; +} + +int +cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct roc_nix_sq *sq; + int i, ret; + + RTE_SET_USED(event_dev); + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { + sq = &cnxk_eth_dev->sqs[i]; + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, + NULL); + if (ret < 0) + return ret; + } + } else { + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, NULL); + if (ret < 0) + return ret; + } + + return 0; +} -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 38 ++++++++ drivers/event/cnxk/cn10k_worker.h | 67 ++++++++++++++ drivers/event/cnxk/cn10k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn10k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_eventdev.c | 81 +++++++++++++++++ drivers/event/cnxk/cn9k_worker.h | 87 +++++++++++++++++++ drivers/event/cnxk/cn9k_worker_dual_tx_enq.c | 23 +++++ .../event/cnxk/cn9k_worker_dual_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/meson.build | 6 ++ 11 files changed, 417 insertions(+) create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 8a9b04a3d..e462f770c 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; @@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; } static void diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index b724083ca..3c90c8500 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -11,6 +11,7 @@ #include "cn10k_ethdev.h" #include "cn10k_rx.h" +#include "cn10k_tx.h" /* SSO Operations */ @@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn10k_eth_txq * +cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn10k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline uint16_t +cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, + uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + const struct cn10k_eth_txq *txq; + struct rte_mbuf *m = ev->mbuf; + uint16_t ref_cnt = m->refcnt; + uintptr_t lmt_addr; + uint16_t lmt_id; + uintptr_t pa; + + lmt_addr = ws->lmt_base; + ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + txq = cn10k_sso_hws_xtract_meta(m, txq_data); + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; + } + if (!ev->sched_type) + cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, + ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c new file mode 100644 index 000000000..f9968ac0d --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c new file mode 100644 index 000000000..a24fc42e5 --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index bdc563223..af97020f2 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -430,6 +430,39 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; @@ -510,6 +543,25 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) } } + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + if (dev->dual_ws) { event_dev->enqueue = cn9k_sso_hws_dual_enq; event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst; @@ -618,8 +670,37 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] + */ + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } } + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; rte_mb(); } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index c01c00e1d..5aa053c58 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -11,6 +11,7 @@ #include "cn9k_ethdev.h" #include "cn9k_rx.h" +#include "cn9k_tx.h" /* SSO Operations */ @@ -416,4 +417,90 @@ NIX_RX_FASTPATH_MODES NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn9k_eth_txq * +cn9k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn9k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline void +cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m, + uint64_t *cmd, const uint32_t flags) +{ + roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags)); + cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt); +} + +static __rte_always_inline uint16_t +cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + struct rte_mbuf *m = ev->mbuf; + const struct cn9k_eth_txq *txq; + uint16_t ref_cnt = m->refcnt; + + /* Perform header writes before barrier for TSO */ + cn9k_nix_xmit_prepare_tso(m, flags); + /* Lets commit any changes in the packet here in case when + * fast free is set as no further changes will be made to mbuf. + * In case of fast free is not set, both cn9k_nix_prepare_mseg() + * and cn9k_nix_xmit_prepare() has a barrier after refcnt update. + */ + if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_io_wmb(); + txq = cn9k_sso_hws_xtract_meta(m, txq_data); + cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags); + + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, + txq->io_addr, segdw); + } else { + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, + segdw); + } + } else { + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_one(cmd, txq->lmt_addr, + txq->io_addr, flags); + } else { + cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, + flags); + } + } + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG, + base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c new file mode 100644 index 000000000..92e2981f0 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws_dual *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c new file mode 100644 index 000000000..dfb574cf9 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws_dual *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c new file mode 100644 index 000000000..3df649c0c --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c new file mode 100644 index 000000000..0efe29113 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index c5c1c0ee8..13e0634e8 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -17,11 +17,17 @@ sources = files( 'cn9k_worker_dual_deq.c', 'cn9k_worker_dual_deq_burst.c', 'cn9k_worker_dual_deq_tmo.c', + 'cn9k_worker_tx_enq.c', + 'cn9k_worker_tx_enq_seg.c', + 'cn9k_worker_dual_tx_enq.c', + 'cn9k_worker_dual_tx_enq_seg.c', 'cn10k_eventdev.c', 'cn10k_worker.c', 'cn10k_worker_deq.c', 'cn10k_worker_deq_burst.c', 'cn10k_worker_deq_tmo.c', + 'cn10k_worker_tx_enq.c', + 'cn10k_worker_tx_enq_seg.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add event vector support for cnxk event Rx adapter, add control path APIs to get vector limits and ability to configure event vectorization on a given Rx queue. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 2 + drivers/event/cnxk/cn10k_eventdev.c | 106 ++++++++++++++++++++++- drivers/event/cnxk/cnxk_eventdev.h | 2 + drivers/event/cnxk/cnxk_eventdev_adptr.c | 25 ++++++ drivers/net/cnxk/cnxk_ethdev.h | 2 +- 5 files changed, 135 insertions(+), 2 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 6fdccc2ab..0297cd3d5 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -45,6 +45,8 @@ Features of the OCTEON cnxk SSO PMD are: - Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` capability while maintaining receive packet order. - Full Rx/Tx offload support defined through ethdev queue configuration. +- HW managed event vectorization on CN10K for packets enqueued from ethdev to + eventdev configurable per each Rx queue in Rx adapter. Prerequisites and Compilation procedure --------------------------------------- diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e462f770c..e85fa4785 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, else *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | - RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID | + RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR; return 0; } @@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_rx_adapter_vector_limits( + const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, + struct rte_event_eth_rx_adapter_vector_limits *limits) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + return -ENOTSUP; + + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + limits->log2_sz = true; + limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2; + limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2; + limits->min_timeout_ns = + (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100; + limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns; + + return 0; +} + +static int +cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev, + uint16_t port_id, uint16_t rq_id, uint16_t sz, + uint64_t tmo_ns, struct rte_mempool *vmp) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + + if (!rq->sso_ena) + return -EINVAL; + if (rq->flow_tag_width == 0) + return -EINVAL; + + rq->vwqe_ena = 1; + rq->vwqe_first_skip = 0; + rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id); + rq->vwqe_max_sz_exp = rte_log2_u32(sz); + rq->vwqe_wait_tmo = + tmo_ns / + ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100); + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= + (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4)) + << 24; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cn10k_sso_rx_adapter_vector_config( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_event_vector_config *config) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + struct cnxk_sso_evdev *dev; + int i, rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + dev = cnxk_sso_pmd_priv(event_dev); + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, i, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + } else { + + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, rx_queue_id, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + + return 0; +} + static int cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, uint32_t *caps) @@ -739,6 +839,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_rx_adapter_vector_limits_get = cn10k_sso_rx_adapter_vector_limits, + .eth_rx_adapter_event_vector_config = + cn10k_sso_rx_adapter_vector_config, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 458fdc8d9..3783e0c95 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -96,6 +96,8 @@ struct cnxk_sso_evdev { uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; + uint16_t vec_pool_cnt; + uint64_t *vec_pools; /* Dev args */ uint32_t xae_cnt; uint8_t qos_queue_cnt; diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 548d7b81c..c4c4f5a7f 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -40,6 +40,31 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, dev->adptr_xae_cnt += rxq->qconf.mp->size; break; } + case RTE_EVENT_TYPE_ETHDEV_VECTOR: { + struct rte_mempool *mp = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->vec_pool_cnt; i++) { + if ((uint64_t)mp == dev->vec_pools[i]) + return; + } + + dev->vec_pool_cnt++; + old_ptr = dev->vec_pools; + dev->vec_pools = + rte_realloc(dev->vec_pools, + sizeof(uint64_t) * dev->vec_pool_cnt, 0); + if (dev->vec_pools == NULL) { + dev->adptr_xae_cnt += mp->size; + dev->vec_pools = old_ptr; + dev->vec_pool_cnt--; + return; + } + dev->vec_pools[dev->vec_pool_cnt - 1] = (uint64_t)mp; + + dev->adptr_xae_cnt += mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h index 4eead0390..2528b3cda 100644 --- a/drivers/net/cnxk/cnxk_ethdev.h +++ b/drivers/net/cnxk/cnxk_ethdev.h @@ -238,7 +238,7 @@ struct cnxk_eth_txq_sp { } __plt_cache_aligned; static inline struct cnxk_eth_dev * -cnxk_eth_pmd_priv(struct rte_eth_dev *eth_dev) +cnxk_eth_pmd_priv(const struct rte_eth_dev *eth_dev) { return eth_dev->data->dev_private; } -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Rx event vector fastpath to convert HW defined metadata into rte_mbuf and rte_event_vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/rel_notes/release_21_08.rst | 1 + drivers/event/cnxk/cn10k_worker.h | 56 +++++++ drivers/net/cnxk/cn10k_rx.h | 200 +++++++++++++++---------- drivers/net/cnxk/cn10k_rx_vec.c | 2 +- drivers/net/cnxk/cn10k_rx_vec_mseg.c | 5 +- 5 files changed, 179 insertions(+), 85 deletions(-) diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 80ff93269..11ccc9bcb 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -64,6 +64,7 @@ New Features * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested is net/cnxk. + * Add support for event vectorization for Rx adapter. Removed Items diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 3c90c8500..7a48a6b17 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,6 +5,8 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include <rte_vect.h> + #include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" @@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, mbuf_init | ((uint64_t)port_id) << 48, flags); } +static __rte_always_inline void +cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, + void *lookup_mem, void *tstamp) +{ + uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + struct rte_event_vector *vec; + uint16_t nb_mbufs, non_vec; + uint64_t **wqe; + + mbuf_init |= ((uint64_t)port_id) << 48; + vec = (struct rte_event_vector *)vwqe; + wqe = vec->u64s; + + nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP); + nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs, + flags | NIX_RX_VWQE_F, lookup_mem, + tstamp); + wqe += nb_mbufs; + non_vec = vec->nb_elem - nb_mbufs; + + while (non_vec) { + struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0]; + struct rte_mbuf *mbuf; + uint64_t tstamp_ptr; + + mbuf = (struct rte_mbuf *)((char *)cqe - + sizeof(struct rte_mbuf)); + cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, + mbuf_init, flags); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + wqe[0] = (uint64_t *)mbuf; + non_vec--; + wqe++; + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, const uint32_t flags, void *lookup_mem) @@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, flags & NIX_RX_MULTI_SEG_F, (uint64_t *)tstamp_ptr); gw.u64[1] = mbuf; + } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV_VECTOR) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1]; + + vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | + ((vwqe_hdr & 0xFFFF) << 48) | + ((uint64_t)port << 32); + *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr; + cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem, + ws->tstamp); } } diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index d9572b19e..a506a867c 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -21,6 +21,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_RX_VWQE_F BIT(14) #define NIX_RX_MULTI_SEG_F BIT(15) #define CNXK_NIX_CQ_ENTRY_SZ 128 @@ -28,6 +29,11 @@ #define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x)) #define CQE_SZ(x) ((x) * CNXK_NIX_CQ_ENTRY_SZ) +#define CQE_PTR_OFF(b, i, o, f) \ + (((f) & NIX_RX_VWQE_F) ? \ + (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \ + (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o))) + union mbuf_initializer { struct { uint16_t data_off; @@ -317,61 +323,87 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf) } static __rte_always_inline uint16_t -cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) +cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, + const uint16_t flags, void *lookup_mem, + struct cnxk_timesync_info *tstamp) { - struct cn10k_eth_rxq *rxq = rx_queue; - uint16_t packets = 0; + struct cn10k_eth_rxq *rxq = args; + const uint64_t mbuf_initializer = (flags & NIX_RX_VWQE_F) ? + *(uint64_t *)args : + rxq->mbuf_initializer; + const uint64x2_t data_off = flags & NIX_RX_VWQE_F ? + vdupq_n_u64(0x80ULL) : + vdupq_n_u64(rxq->data_off); + const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask; + const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata; + const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc; uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23; - const uint64_t mbuf_initializer = rxq->mbuf_initializer; - const uint64x2_t data_off = vdupq_n_u64(rxq->data_off); uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3; uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer); struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3; - const uint16_t *lookup_mem = rxq->lookup_mem; - const uint32_t qmask = rxq->qmask; - const uint64_t wdata = rxq->wdata; - const uintptr_t desc = rxq->desc; uint8x16_t f0, f1, f2, f3; - uint32_t head = rxq->head; + uint16_t packets = 0; uint16_t pkts_left; - - pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); - pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); - - /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + uint32_t head; + uintptr_t cq0; + + if (!(flags & NIX_RX_VWQE_F)) { + lookup_mem = rxq->lookup_mem; + head = rxq->head; + + pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); + pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); + /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) + tstamp = rxq->tstamp; + } else { + RTE_SET_USED(head); + } while (packets < pkts) { - /* Exit loop if head is about to wrap and become unaligned */ - if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < - NIX_DESCS_PER_LOOP) { - pkts_left += (pkts - packets); - break; - } + if (!(flags & NIX_RX_VWQE_F)) { + /* Exit loop if head is about to wrap and become + * unaligned. + */ + if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < + NIX_DESCS_PER_LOOP) { + pkts_left += (pkts - packets); + break; + } - const uintptr_t cq0 = desc + CQE_SZ(head); + cq0 = desc + CQE_SZ(head); + } else { + cq0 = (uintptr_t)&mbufs[packets]; + } /* Prefetch N desc ahead */ - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(8))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(9))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(10))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(11))); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 8, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 9, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 10, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 11, 0, flags)); /* Get NIX_RX_SG_S for size and buffer pointer */ - cq0_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(0) + 64)); - cq1_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(1) + 64)); - cq2_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(2) + 64)); - cq3_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(3) + 64)); - - /* Extract mbuf from NIX_RX_SG_S */ - mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); - mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); - mbuf01 = vqsubq_u64(mbuf01, data_off); - mbuf23 = vqsubq_u64(mbuf23, data_off); + cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags)); + cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags)); + cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags)); + cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags)); + + if (!(flags & NIX_RX_VWQE_F)) { + /* Extract mbuf from NIX_RX_SG_S */ + mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); + mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); + mbuf01 = vqsubq_u64(mbuf01, data_off); + mbuf23 = vqsubq_u64(mbuf23, data_off); + } else { + mbuf01 = + vsubq_u64(vld1q_u64((uint64_t *)cq0), data_off); + mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)), + data_off); + } /* Move mbufs to scalar registers for future use */ mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0); @@ -395,14 +427,14 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, f3 = vqtbl1q_u8(cq3_w8, shuf_msk); /* Load CQE word0 and word 1 */ - uint64_t cq0_w0 = ((uint64_t *)(cq0 + CQE_SZ(0)))[0]; - uint64_t cq0_w1 = ((uint64_t *)(cq0 + CQE_SZ(0)))[1]; - uint64_t cq1_w0 = ((uint64_t *)(cq0 + CQE_SZ(1)))[0]; - uint64_t cq1_w1 = ((uint64_t *)(cq0 + CQE_SZ(1)))[1]; - uint64_t cq2_w0 = ((uint64_t *)(cq0 + CQE_SZ(2)))[0]; - uint64_t cq2_w1 = ((uint64_t *)(cq0 + CQE_SZ(2)))[1]; - uint64_t cq3_w0 = ((uint64_t *)(cq0 + CQE_SZ(3)))[0]; - uint64_t cq3_w1 = ((uint64_t *)(cq0 + CQE_SZ(3)))[1]; + const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags); + const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 1, flags); + const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags); + const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 1, flags); + const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags); + const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 1, flags); + const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags); + const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 1, flags); if (flags & NIX_RX_OFFLOAD_RSS_F) { /* Fill rss in the rx_descriptor_fields1 */ @@ -459,17 +491,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) { ol_flags0 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(0) + 38), ol_flags0, - mbuf0); + *(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags), + ol_flags0, mbuf0); ol_flags1 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(1) + 38), ol_flags1, - mbuf1); + *(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags), + ol_flags1, mbuf1); ol_flags2 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(2) + 38), ol_flags2, - mbuf2); + *(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags), + ol_flags2, mbuf2); ol_flags3 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(3) + 38), ol_flags3, - mbuf3); + *(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags), + ol_flags3, mbuf3); } if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { @@ -488,7 +520,7 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, RTE_PTYPE_L2_ETHER_TIMESYNC}; const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | PKT_RX_IEEE1588_TMST | - rxq->tstamp->rx_tstamp_dynflag; + tstamp->rx_tstamp_dynflag; const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; uint64x2_t ts01, ts23, mask; uint64_t ts[4]; @@ -526,14 +558,10 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, ts[3] = vgetq_lane_u64(ts23, 1); /* Store timestamp into dynfield. */ - *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = - ts[0]; - *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = - ts[1]; - *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = - ts[2]; - *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = - ts[3]; + *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3]; /* Generate ptype mask to filter L2 ether timesync */ mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); @@ -559,9 +587,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, /* Update Rxq timestamp with the latest * timestamp. */ - rxq->tstamp->rx_ready = 1; - rxq->tstamp->rx_tstamp = - ts[31 - __builtin_clz(res)]; + tstamp->rx_ready = 1; + tstamp->rx_tstamp = ts[31 - __builtin_clz(res)]; } } @@ -584,25 +611,25 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); /* Store the mbufs to rx_pkts */ - vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); - vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); + vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); if (flags & NIX_RX_MULTI_SEG_F) { /* Multi segment is enable build mseg list for * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 0, 8, flags)), + mbuf0, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 1, 8, flags)), + mbuf1, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 2, 8, flags)), + mbuf2, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 3, 8, flags)), + mbuf3, mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; @@ -623,12 +650,18 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, __mempool_check_cookies(mbuf2->pool, (void **)&mbuf2, 1, 1); __mempool_check_cookies(mbuf3->pool, (void **)&mbuf3, 1, 1); - /* Advance head pointer and packets */ - head += NIX_DESCS_PER_LOOP; - head &= qmask; packets += NIX_DESCS_PER_LOOP; + + if (!(flags & NIX_RX_VWQE_F)) { + /* Advance head pointer and packets */ + head += NIX_DESCS_PER_LOOP; + head &= qmask; + } } + if (flags & NIX_RX_VWQE_F) + return packets; + rxq->head = head; rxq->available -= packets; @@ -637,8 +670,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, plt_write64((rxq->wdata | packets), rxq->cq_door); if (unlikely(pkts_left)) - packets += cn10k_nix_recv_pkts(rx_queue, &rx_pkts[packets], - pkts_left, flags); + packets += cn10k_nix_recv_pkts(args, &mbufs[packets], pkts_left, + flags); return packets; } @@ -647,12 +680,15 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, static inline uint16_t cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) + uint16_t pkts, const uint16_t flags, + void *lookup_mem, void *tstamp) { + RTE_SET_USED(lookup_mem); RTE_SET_USED(rx_queue); RTE_SET_USED(rx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(flags); + RTE_SET_USED(tstamp); return 0; } diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c index 93528a44f..166735ad5 100644 --- a/drivers/net/cnxk/cn10k_rx_vec.c +++ b/drivers/net/cnxk/cn10k_rx_vec.c @@ -12,7 +12,7 @@ uint16_t pkts) \ { \ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags)); \ + (flags), NULL, NULL); \ } NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c index 04d1e46c8..1f44ddddd 100644 --- a/drivers/net/cnxk/cn10k_rx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c @@ -9,8 +9,9 @@ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ { \ - return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags) | NIX_RX_MULTI_SEG_F); \ + return cn10k_nix_recv_pkts_vector( \ + rx_queue, rx_pkts, pkts, (flags) | NIX_RX_MULTI_SEG_F, \ + NULL, NULL); \ } NIX_RX_FASTPATH_MODES -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Tx event vector fastpath, integrate event vector Tx routine into Tx burst. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 1 + doc/guides/rel_notes/release_21_08.rst | 2 +- drivers/common/cnxk/roc_sso.h | 23 ++++++ drivers/event/cnxk/cn10k_eventdev.c | 3 +- drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++-- drivers/event/cnxk/cn9k_worker.h | 4 +- drivers/event/cnxk/cnxk_worker.h | 22 ------ drivers/net/cnxk/cn10k_tx.c | 2 +- drivers/net/cnxk/cn10k_tx.h | 52 +++++++++---- drivers/net/cnxk/cn10k_tx_mseg.c | 3 +- drivers/net/cnxk/cn10k_tx_vec.c | 2 +- drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +- 12 files changed, 167 insertions(+), 53 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 0297cd3d5..53560d383 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -47,6 +47,7 @@ Features of the OCTEON cnxk SSO PMD are: - Full Rx/Tx offload support defined through ethdev queue configuration. - HW managed event vectorization on CN10K for packets enqueued from ethdev to eventdev configurable per each Rx queue in Rx adapter. +- Event vector transmission via Tx adapter. Prerequisites and Compilation procedure --------------------------------------- diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 11ccc9bcb..9e49cb27d 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -64,7 +64,7 @@ New Features * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested is net/cnxk. - * Add support for event vectorization for Rx adapter. + * Add support for event vectorization for Rx/Tx adapter. Removed Items diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h index a6030e7d8..316c6ccd5 100644 --- a/drivers/common/cnxk/roc_sso.h +++ b/drivers/common/cnxk/roc_sso.h @@ -44,6 +44,29 @@ struct roc_sso { uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned; } __plt_cache_aligned; +static __rte_always_inline void +roc_sso_hws_head_wait(uintptr_t tag_op) +{ +#ifdef RTE_ARCH_ARM64 + uint64_t tag; + + asm volatile(PLT_CPU_FEATURE_PREAMBLE + " ldr %[tag], [%[tag_op]] \n" + " tbnz %[tag], 35, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[tag], [%[tag_op]] \n" + " tbz %[tag], 35, rty%= \n" + "done%=: \n" + : [tag] "=&r"(tag) + : [tag_op] "r"(tag_op)); +#else + /* Wait for the SWTAG/SWTAG_FULL operation */ + while (!(plt_read64(tag_op) & BIT_ULL(35))) + ; +#endif +} + /* SSO device initialization */ int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso); int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso); diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e85fa4785..6f37c5bd2 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, if (ret) *caps = 0; else - *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR; return 0; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 7a48a6b17..9cc099206 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R -static __rte_always_inline const struct cn10k_eth_txq * +static __rte_always_inline struct cn10k_eth_txq * cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) { - return (const struct cn10k_eth_txq *) + return (struct cn10k_eth_txq *) txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; } +static __rte_always_inline void +cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs, + uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr, + uint8_t sched_type, uintptr_t base, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + uint16_t port[4], queue[4]; + struct cn10k_eth_txq *txq; + uint16_t i, j; + uintptr_t pa; + + for (i = 0; i < nb_mbufs; i += 4) { + port[0] = mbufs[i]->port; + port[1] = mbufs[i + 1]->port; + port[2] = mbufs[i + 2]->port; + port[3] = mbufs[i + 3]->port; + + queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]); + queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]); + queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]); + queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]); + + if (((port[0] ^ port[1]) & (port[2] ^ port[3])) || + ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) { + + for (j = 0; j < 4; j++) { + struct rte_mbuf *m = mbufs[i + j]; + + txq = (struct cn10k_eth_txq *) + txq_data[port[j]][queue[j]]; + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier + * for TSO + */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, + txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg( + m, (uint64_t *)lmt_addr, + flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | + (cn10k_nix_tx_ext_subs(flags) + 1) + << 4; + } + if (!sched_type) + roc_sso_hws_head_wait(base + + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + } + } else { + txq = (struct cn10k_eth_txq *) + txq_data[port[0]][queue[0]]; + cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base + + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], const uint32_t flags) { - const struct cn10k_eth_txq *txq; - struct rte_mbuf *m = ev->mbuf; - uint16_t ref_cnt = m->refcnt; + struct cn10k_eth_txq *txq; + struct rte_mbuf *m; uintptr_t lmt_addr; + uint16_t ref_cnt; uint16_t lmt_id; uintptr_t pa; lmt_addr = ws->lmt_base; ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + + if (ev->event_type & RTE_EVENT_TYPE_VECTOR) { + struct rte_mbuf **mbufs = ev->vec->mbufs; + uint64_t meta = *(uint64_t *)ev->vec; + + if (meta & BIT(31)) { + txq = (struct cn10k_eth_txq *) + txq_data[meta >> 32][meta >> 48]; + + cn10k_nix_xmit_pkts_vector( + txq, mbufs, meta & 0xFFFF, cmd, + ws->tx_base + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } else { + cn10k_sso_vwqe_split_tx( + mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr, + ev->sched_type, ws->tx_base, txq_data, flags); + } + rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec); + return (meta & 0xFFFF); + } + + m = ev->mbuf; + ref_cnt = m->refcnt; txq = cn10k_sso_hws_xtract_meta(m, txq_data); cn10k_nix_tx_skeleton(txq, cmd, flags); /* Perform header writes before barrier for TSO */ @@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; } if (!ev->sched_type) - cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); roc_lmt_submit_steorl(lmt_id, pa); @@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); - return 1; } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 5aa053c58..ef1e83741 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -458,7 +458,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, segdw); @@ -469,7 +469,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, } else { if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags); diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h index 4eb46ae16..945132b74 100644 --- a/drivers/event/cnxk/cnxk_worker.h +++ b/drivers/event/cnxk/cnxk_worker.h @@ -75,27 +75,5 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op) #endif } -static __rte_always_inline void -cnxk_sso_hws_head_wait(uintptr_t tag_op) -{ -#ifdef RTE_ARCH_ARM64 - uint64_t swtp; - - asm volatile(PLT_CPU_FEATURE_PREAMBLE - " ldr %[swtb], [%[swtp_loc]] \n" - " tbz %[swtb], 35, done%= \n" - " sevl \n" - "rty%=: wfe \n" - " ldr %[swtb], [%[swtp_loc]] \n" - " tbnz %[swtb], 35, rty%= \n" - "done%=: \n" - : [swtb] "=&r"(swtp) - : [swtp_loc] "r"(tag_op)); -#else - /* Wait for the SWTAG/SWTAG_FULL operation */ - while (plt_read64(tag_op) & BIT_ULL(35)) - ; -#endif -} #endif diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 1f30bab59..0e1276c60 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \ - flags); \ + 0, flags); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index eb148b8e7..f75cae07a 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -18,6 +18,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_TX_VWQE_F BIT(14) #define NIX_TX_MULTI_SEG_F BIT(15) #define NIX_TX_NEED_SEND_HDR_W1 \ @@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) static __rte_always_inline uint16_t cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, - uint64_t *cmd, const uint16_t flags) + uint64_t *cmd, uintptr_t base, const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; const rte_iova_t io_addr = txq->io_addr; @@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t lso_tun_fmt; uint64_t data; - NIX_XMIT_FC_OR_RETURN(txq, pkts); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } /* Get cmd skeleton */ cn10k_nix_tx_skeleton(txq, cmd, flags); - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; - if (flags & NIX_TX_OFFLOAD_TSO_F) lso_tun_fmt = txq->lso_tun_fmt; @@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2); } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (burst > 16) { data = cn10k_nix_tx_steor_data(flags); @@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; uintptr_t pa0, pa1, lmt_addr = txq->lmt_base; @@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, shft += 3; } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + data0 = (uint64_t)data128; data1 = (uint64_t)(data128 >> 64); /* Make data0 similar to data1 */ @@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; @@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64_t data[2]; } wd; - NIX_XMIT_FC_OR_RETURN(txq, pkts); - - scalar = pkts & (NIX_DESCS_PER_LOOP - 1); - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } else { + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + } - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; /* Perform header writes before barrier for TSO */ if (flags & NIX_TX_OFFLOAD_TSO_F) { for (i = 0; i < pkts; i++) @@ -1973,6 +1987,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (flags & NIX_TX_MULTI_SEG_F) wd.data[0] >>= 16; + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (lnum > 16) { if (!(flags & NIX_TX_MULTI_SEG_F)) @@ -2029,10 +2046,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (unlikely(scalar)) { if (flags & NIX_TX_MULTI_SEG_F) pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, - scalar, cmd, flags); + scalar, cmd, base, + flags); else pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, - cmd, flags); + cmd, base, flags); } return pkts; @@ -2041,13 +2059,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, #else static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { RTE_SET_USED(tx_queue); RTE_SET_USED(tx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(cmd); RTE_SET_USED(flags); + RTE_SET_USED(base); return 0; } #endif diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c index 33f675472..4ea4c8a4e 100644 --- a/drivers/net/cnxk/cn10k_tx_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_mseg.c @@ -18,7 +18,8 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \ - (flags) | NIX_TX_MULTI_SEG_F); \ + 0, (flags) \ + | NIX_TX_MULTI_SEG_F); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 34e373750..a0350496a 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -18,7 +18,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ - (flags)); \ + 0, (flags)); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c index 1fad81dba..7f98f79b9 100644 --- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector( \ - tx_queue, tx_pkts, pkts, cmd, \ + tx_queue, tx_pkts, pkts, cmd, 0, \ (flags) | NIX_TX_MULTI_SEG_F); \ } -- 2.17.1
On Tue, Jun 29, 2021 at 1:14 PM <pbhagavatula@marvell.com> wrote: > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Add multi-segment Rx vector routine, form the primary mbufs using > vector path switch to scalar path when extracting segments. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > Series-acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Series applied to dpdk-next-net-mrvl/for-dpdk-main. Thanks. > --- > v5 Changes: > - Fix incorrect mbuf assignment. > v4 Changes: > - Split patches for easier merge. > - Rebase on dpdk-next-net-mrvl. > v3 Changes: > - Spell check. > > drivers/net/cnxk/cn10k_rx.c | 31 +++++++++++------ > drivers/net/cnxk/cn10k_rx.h | 51 +++++++++++++++++++++------- > drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++ > drivers/net/cnxk/cn9k_rx.c | 31 +++++++++++------ > drivers/net/cnxk/cn9k_rx.h | 51 +++++++++++++++++++++------- > drivers/net/cnxk/cn9k_rx_vec_mseg.c | 18 ++++++++++ > drivers/net/cnxk/meson.build | 2 ++ > 7 files changed, 157 insertions(+), 44 deletions(-) > create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c > create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c > > diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c > index 5c956c06b..3a9fd7130 100644 > --- a/drivers/net/cnxk/cn10k_rx.c > +++ b/drivers/net/cnxk/cn10k_rx.c > @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev, > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)] > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)] > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)]; > + > + rte_atomic_thread_fence(__ATOMIC_RELEASE); > } > > void > @@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev) > #undef R > }; > > - /* For PTP enabled, scalar rx function should be chosen as most of the > - * PTP apps are implemented to rx burst 1 pkt. > - */ > - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) > - pick_rx_func(eth_dev, nix_eth_rx_burst); > - else > - pick_rx_func(eth_dev, nix_eth_rx_vec_burst); > + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = { > +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ > + [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name, > > - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); > + NIX_RX_FASTPATH_MODES > +#undef R > + }; > > /* Copy multi seg version with no offload for tear down sequence */ > if (rte_eal_process_type() == RTE_PROC_PRIMARY) > dev->rx_pkt_burst_no_offload = > nix_eth_rx_burst_mseg[0][0][0][0][0][0]; > - rte_mb(); > + > + /* For PTP enabled, scalar rx function should be chosen as most of the > + * PTP apps are implemented to rx burst 1 pkt. > + */ > + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { > + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); > + return pick_rx_func(eth_dev, nix_eth_rx_burst); > + } > + > + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg); > + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst); > } > diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h > index 1cc37cbaa..5926ff7f4 100644 > --- a/drivers/net/cnxk/cn10k_rx.h > +++ b/drivers/net/cnxk/cn10k_rx.h > @@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, > > sg = *(const uint64_t *)(rx + 1); > nb_segs = (sg >> 48) & 0x3; > - mbuf->nb_segs = nb_segs; > + > + if (nb_segs == 1) { > + mbuf->next = NULL; > + return; > + } > + > + mbuf->pkt_len = rx->pkt_lenm1 + 1; > mbuf->data_len = sg & 0xFFFF; > + mbuf->nb_segs = nb_segs; > sg = sg >> 16; > > eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1)); > @@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, > ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf); > > mbuf->ol_flags = ol_flags; > - *(uint64_t *)(&mbuf->rearm_data) = val; > mbuf->pkt_len = len; > + mbuf->data_len = len; > + *(uint64_t *)(&mbuf->rearm_data) = val; > > - if (flag & NIX_RX_MULTI_SEG_F) { > + if (flag & NIX_RX_MULTI_SEG_F) > nix_cqe_xtract_mseg(rx, mbuf, val); > - } else { > - mbuf->data_len = len; > + else > mbuf->next = NULL; > - } > } > > static inline uint16_t > @@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, > vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); > vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); > > - /* Update that no more segments */ > - mbuf0->next = NULL; > - mbuf1->next = NULL; > - mbuf2->next = NULL; > - mbuf3->next = NULL; > - > /* Store the mbufs to rx_pkts */ > vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); > vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); > > + if (flags & NIX_RX_MULTI_SEG_F) { > + /* Multi segment is enable build mseg list for > + * individual mbufs in scalar mode. > + */ > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(0) + 8), mbuf0, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(1) + 8), mbuf1, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(2) + 8), mbuf2, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(3) + 8), mbuf3, > + mbuf_initializer); > + } else { > + /* Update that no more segments */ > + mbuf0->next = NULL; > + mbuf1->next = NULL; > + mbuf2->next = NULL; > + mbuf3->next = NULL; > + } > + > /* Prefetch mbufs */ > roc_prefetch_store_keep(mbuf0); > roc_prefetch_store_keep(mbuf1); > @@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \ > void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ > \ > uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name( \ > + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ > + \ > + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ > void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); > > NIX_RX_FASTPATH_MODES > diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c > new file mode 100644 > index 000000000..04d1e46c8 > --- /dev/null > +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c > @@ -0,0 +1,17 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(C) 2021 Marvell. > + */ > + > +#include "cn10k_ethdev.h" > +#include "cn10k_rx.h" > + > +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ > + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ > + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ > + { \ > + return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ > + (flags) | NIX_RX_MULTI_SEG_F); \ > + } > + > +NIX_RX_FASTPATH_MODES > +#undef R > diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c > index 0acedd0a1..d293d4eac 100644 > --- a/drivers/net/cnxk/cn9k_rx.c > +++ b/drivers/net/cnxk/cn9k_rx.c > @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev, > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)] > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)] > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)]; > + > + rte_atomic_thread_fence(__ATOMIC_RELEASE); > } > > void > @@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev) > #undef R > }; > > - /* For PTP enabled, scalar rx function should be chosen as most of the > - * PTP apps are implemented to rx burst 1 pkt. > - */ > - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) > - pick_rx_func(eth_dev, nix_eth_rx_burst); > - else > - pick_rx_func(eth_dev, nix_eth_rx_vec_burst); > + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = { > +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ > + [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name, > > - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); > + NIX_RX_FASTPATH_MODES > +#undef R > + }; > > /* Copy multi seg version with no offload for tear down sequence */ > if (rte_eal_process_type() == RTE_PROC_PRIMARY) > dev->rx_pkt_burst_no_offload = > nix_eth_rx_burst_mseg[0][0][0][0][0][0]; > - rte_mb(); > + > + /* For PTP enabled, scalar rx function should be chosen as most of the > + * PTP apps are implemented to rx burst 1 pkt. > + */ > + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { > + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); > + return pick_rx_func(eth_dev, nix_eth_rx_burst); > + } > + > + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg); > + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst); > } > diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h > index 10ef5c690..5ae9e8195 100644 > --- a/drivers/net/cnxk/cn9k_rx.h > +++ b/drivers/net/cnxk/cn9k_rx.h > @@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, > > sg = *(const uint64_t *)(rx + 1); > nb_segs = (sg >> 48) & 0x3; > - mbuf->nb_segs = nb_segs; > + > + if (nb_segs == 1) { > + mbuf->next = NULL; > + return; > + } > + > + mbuf->pkt_len = rx->pkt_lenm1 + 1; > mbuf->data_len = sg & 0xFFFF; > + mbuf->nb_segs = nb_segs; > sg = sg >> 16; > > eol = ((const rte_iova_t *)(rx + 1) + > @@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, > nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf); > > mbuf->ol_flags = ol_flags; > - *(uint64_t *)(&mbuf->rearm_data) = val; > mbuf->pkt_len = len; > + mbuf->data_len = len; > + *(uint64_t *)(&mbuf->rearm_data) = val; > > - if (flag & NIX_RX_MULTI_SEG_F) { > + if (flag & NIX_RX_MULTI_SEG_F) > nix_cqe_xtract_mseg(rx, mbuf, val); > - } else { > - mbuf->data_len = len; > + else > mbuf->next = NULL; > - } > } > > static inline uint16_t > @@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, > vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); > vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); > > - /* Update that no more segments */ > - mbuf0->next = NULL; > - mbuf1->next = NULL; > - mbuf2->next = NULL; > - mbuf3->next = NULL; > - > /* Store the mbufs to rx_pkts */ > vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); > vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); > > + if (flags & NIX_RX_MULTI_SEG_F) { > + /* Multi segment is enable build mseg list for > + * individual mbufs in scalar mode. > + */ > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(0) + 8), mbuf0, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(1) + 8), mbuf1, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(2) + 8), mbuf2, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(3) + 8), mbuf3, > + mbuf_initializer); > + } else { > + /* Update that no more segments */ > + mbuf0->next = NULL; > + mbuf1->next = NULL; > + mbuf2->next = NULL; > + mbuf3->next = NULL; > + } > + > /* Prefetch mbufs */ > roc_prefetch_store_keep(mbuf0); > roc_prefetch_store_keep(mbuf1); > @@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \ > void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ > \ > uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \ > + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ > + \ > + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \ > void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); > > NIX_RX_FASTPATH_MODES > diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c > new file mode 100644 > index 000000000..e46d8a474 > --- /dev/null > +++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c > @@ -0,0 +1,18 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(C) 2021 Marvell. > + */ > + > +#include "cn9k_ethdev.h" > +#include "cn9k_rx.h" > + > +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ > + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \ > + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ > + { \ > + return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ > + (flags) | \ > + NIX_RX_MULTI_SEG_F); \ > + } > + > +NIX_RX_FASTPATH_MODES > +#undef R > diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build > index 2071d0dcb..aa8c7253f 100644 > --- a/drivers/net/cnxk/meson.build > +++ b/drivers/net/cnxk/meson.build > @@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c', > 'cn9k_rx.c', > 'cn9k_rx_mseg.c', > 'cn9k_rx_vec.c', > + 'cn9k_rx_vec_mseg.c', > 'cn9k_tx.c', > 'cn9k_tx_mseg.c', > 'cn9k_tx_vec.c') > @@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c', > 'cn10k_rx.c', > 'cn10k_rx_mseg.c', > 'cn10k_rx_vec.c', > + 'cn10k_rx_vec_mseg.c', > 'cn10k_tx.c', > 'cn10k_tx_mseg.c', > 'cn10k_tx_vec.c') > -- > 2.17.1 >
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter. Resize cn10k workslot fastpath structure to fit in 64B cacheline size. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- v6 Changes: - More code cleanup. - Fix incorrect SQB configuration and missing fc check. v5 Changes: - Use cnxk_eth_rxq_to_sp instead of manually calculating sp offset. v4 Changes: - Split patches for easier merge. v3 Changes: - Spell check. doc/guides/eventdevs/cnxk.rst | 28 ++++ doc/guides/rel_notes/release_21_08.rst | 5 + drivers/common/cnxk/roc_nix.h | 3 + drivers/common/cnxk/roc_nix_fc.c | 78 +++++++++++ drivers/common/cnxk/roc_nix_priv.h | 3 +- drivers/common/cnxk/version.map | 1 + drivers/event/cnxk/cn10k_eventdev.c | 107 ++++++++++++--- drivers/event/cnxk/cn10k_worker.c | 7 +- drivers/event/cnxk/cn10k_worker.h | 32 +++-- drivers/event/cnxk/cn9k_eventdev.c | 89 +++++++++++++ drivers/event/cnxk/cn9k_worker.h | 4 + drivers/event/cnxk/cnxk_eventdev.c | 2 + drivers/event/cnxk/cnxk_eventdev.h | 43 ++++-- drivers/event/cnxk/cnxk_eventdev_adptr.c | 158 +++++++++++++++++++++++ drivers/event/cnxk/meson.build | 9 +- 15 files changed, 522 insertions(+), 47 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 36da3800c..b7e82c127 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -39,6 +39,10 @@ Features of the OCTEON cnxk SSO PMD are: time granularity of 2.5us on CN9K and 1us on CN10K. - Up to 256 TIM rings a.k.a event timer adapters. - Up to 8 rings traversed in parallel. +- HW managed packets enqueued from ethdev to eventdev exposed through event eth + RX adapter. +- N:1 ethernet device Rx queue to Event queue mapping. +- Full Rx offload support defined through ethdev queue configuration. Prerequisites and Compilation procedure --------------------------------------- @@ -93,6 +97,15 @@ Runtime Config Options -a 0002:0e:00.0,qos=[1-50-50-50] +- ``Force Rx Back pressure`` + + Force Rx back pressure when same mempool is used across ethernet device + connected to event device. + + For example:: + + -a 0002:0e:00.0,force_rx_bp=1 + - ``TIM disable NPA`` By default chunks are allocated from NPA then TIM can automatically free @@ -160,3 +173,18 @@ Debugging Options +---+------------+-------------------------------------------------------+ | 2 | TIM | --log-level='pmd\.event\.cnxk\.timer,8' | +---+------------+-------------------------------------------------------+ + +Limitations +----------- + +Rx adapter support +~~~~~~~~~~~~~~~~~~ + +Using the same mempool for all the ethernet device ports connected to +event device would cause back pressure to be asserted only on the first +ethernet device. +Back pressure is automatically disabled when using same mempool for all the +ethernet devices connected to event device to override this applications can +use `force_rx_bp=1` device arguments. +Using unique mempool per each ethernet device is recommended when they are +connected to event device. diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 31e49e1a5..3892c8017 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -60,6 +60,11 @@ New Features * Added net/cnxk driver which provides the support for the integrated ethernet device. +* **Added support for Marvell CN10K, CN9K, event Rx adapter.** + + * Added Rx adapter support for event/cnxk when the ethernet device requested is + net/cnxk. + Removed Items ------------- diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index bb6902795..76613fe84 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix); +void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, + uint8_t ena, uint8_t force); + /* NPC */ int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable); diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c index 47be8aa3f..f17eba416 100644 --- a/drivers/common/cnxk/roc_nix_fc.c +++ b/drivers/common/cnxk/roc_nix_fc.c @@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode mode) exit: return rc; } + +void +rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena, + uint8_t force) +{ + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + struct npa_lf *lf = idev_npa_obj_get(); + struct npa_aq_enq_req *req; + struct npa_aq_enq_rsp *rsp; + struct mbox *mbox; + uint32_t limit; + int rc; + + if (roc_nix_is_sdp(roc_nix)) + return; + + if (!lf) + return; + mbox = lf->mbox; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_READ; + + rc = mbox_process_msg(mbox, (void *)&rsp); + if (rc) + return; + + limit = rsp->aura.limit; + /* BP is already enabled. */ + if (rsp->aura.bp_ena) { + /* If BP ids don't match disable BP. */ + if ((rsp->aura.nix0_bpid != nix->bpid[0]) && !force) { + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + req->aura.bp_ena = 0; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); + } + return; + } + + /* BP was previously enabled but now disabled skip. */ + if (rsp->aura.bp) + return; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + if (ena) { + req->aura.nix0_bpid = nix->bpid[0]; + req->aura_mask.nix0_bpid = ~(req->aura_mask.nix0_bpid); + req->aura.bp = NIX_RQ_AURA_THRESH( + limit > 128 ? 256 : limit); /* 95% of size*/ + req->aura_mask.bp = ~(req->aura_mask.bp); + } + + req->aura.bp_ena = !!ena; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); +} diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h index d9c32df44..9dc0c88a6 100644 --- a/drivers/common/cnxk/roc_nix_priv.h +++ b/drivers/common/cnxk/roc_nix_priv.h @@ -16,7 +16,8 @@ #define NIX_SQB_LOWER_THRESH ((uint16_t)70) /* Apply BP/DROP when CQ is 95% full */ -#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_RQ_AURA_THRESH(x) (((x) * 95) / 100) /* IRQ triggered when NIX_LF_CINTX_CNT[QCOUNT] crosses this value */ #define CQ_CQE_THRESH_DEFAULT 0x1ULL diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map index 8a5c839e5..cb1ce4b6f 100644 --- a/drivers/common/cnxk/version.map +++ b/drivers/common/cnxk/version.map @@ -29,6 +29,7 @@ INTERNAL { roc_nix_fc_config_set; roc_nix_fc_mode_set; roc_nix_fc_mode_get; + rox_nix_fc_npa_bp_cfg; roc_nix_get_base_chan; roc_nix_get_pf; roc_nix_get_pf_func; diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index bf4052c76..2060c8fe8 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -6,18 +6,6 @@ #include "cnxk_eventdev.h" #include "cnxk_worker.h" -static void -cn10k_init_hws_ops(struct cn10k_sso_hws *ws, uintptr_t base) -{ - ws->tag_wqe_op = base + SSOW_LF_GWS_WQE0; - ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0; - ws->updt_wqe_op = base + SSOW_LF_GWS_OP_UPD_WQP_GRP1; - ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM; - ws->swtag_untag_op = base + SSOW_LF_GWS_OP_SWTAG_UNTAG; - ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH; - ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED; -} - static uint32_t cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev) { @@ -56,7 +44,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); - cn10k_init_hws_ops(ws, ws->base); ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -135,13 +122,14 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cq_ds_cnt &= 0x3FFF3FFF0000; while (aq_cnt || cq_ds_cnt || ds_cnt) { - plt_write64(req, ws->getwrk_op); + plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0); cn10k_sso_hws_get_work_empty(ws, &ev); if (fn != NULL && ev.u64 != 0) fn(arg, ev); if (ev.sched_type != SSO_TT_EMPTY) - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, - ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush( + ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); do { val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE); } while (val & BIT_ULL(56)); @@ -205,9 +193,11 @@ cn10k_sso_hws_reset(void *arg, void *hws) if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) { - plt_write64(BIT_ULL(16) | 1, ws->getwrk_op); + plt_write64(BIT_ULL(16) | 1, + ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0)); if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */ @@ -407,6 +397,80 @@ cn10k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn10k)); } +static int +cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } +} + +static int +cn10k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn10k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn10k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -420,6 +484,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .port_unlink = cn10k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -502,6 +572,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn10k, cn10k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN10K_SSO_GW_MODE "=<int>" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index e2aa534c6..5dbae275b 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -18,7 +18,8 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev) cn10k_sso_hws_forward_event(ws, ev); break; case RTE_EVENT_OP_RELEASE: - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); break; default: return 0; @@ -69,7 +70,7 @@ cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return 1; } @@ -94,7 +95,7 @@ cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return ret; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 2f093a8dd..c7250bf9e 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn10k_ethdev.h" +#include "cn10k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t @@ -31,7 +35,8 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) { const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(ws->tag_wqe_op)); + const uint8_t cur_tt = + CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)); /* CNXK model * cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED @@ -43,9 +48,11 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) if (new_tt == SSO_TT_UNTAGGED) { if (cur_tt != SSO_TT_UNTAGGED) - cnxk_sso_hws_swtag_untag(ws->swtag_untag_op); + cnxk_sso_hws_swtag_untag(ws->base + + SSOW_LF_GWS_OP_SWTAG_UNTAG); } else { - cnxk_sso_hws_swtag_norm(tag, new_tt, ws->swtag_norm_op); + cnxk_sso_hws_swtag_norm(tag, new_tt, + ws->base + SSOW_LF_GWS_OP_SWTAG_NORM); } ws->swtag_req = 1; } @@ -57,8 +64,9 @@ cn10k_sso_hws_fwd_group(struct cn10k_sso_hws *ws, const struct rte_event *ev, const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - plt_write64(ev->u64, ws->updt_wqe_op); - cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op); + plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1); + cnxk_sso_hws_swtag_desched(tag, new_tt, grp, + ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED); } static __rte_always_inline void @@ -68,7 +76,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, const uint8_t grp = ev->queue_id; /* Group hasn't changed, Use SWTAG to forward the event */ - if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_wqe_op)) == grp) + if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp) cn10k_sso_hws_fwd_swtag(ws, ev); else /* @@ -93,12 +101,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" : [wdata] "+r"(gw.get_work) - : [gw_loc] "r"(ws->getwrk_op) + : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else - plt_write64(gw.u64[0], ws->getwrk_op); + plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | @@ -130,11 +139,12 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) - : [tag_loc] "r"(ws->tag_wqe_op) + : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 0684417ea..072800c24 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -481,6 +481,88 @@ cn9k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn9k)); } +static int +cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + dws->lookup_mem = lookup_mem; + dws->tstamp = tstmp_info; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } + } +} + +static int +cn9k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn9k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn9k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -494,6 +576,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .port_unlink = cn9k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -571,6 +659,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn9k, cn9k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn9k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn9k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN9K_SSO_SINGLE_WS "=1" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 38fca08fb..f5a440146 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN9K_WORKER_H__ #define __CN9K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn9k_ethdev.h" +#include "cn9k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c index 7189ee3a7..cfd7fb971 100644 --- a/drivers/event/cnxk/cnxk_eventdev.c +++ b/drivers/event/cnxk/cnxk_eventdev.c @@ -571,6 +571,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs) &dev->xae_cnt); rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict, dev); + rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_value, + &dev->force_ena_bp); rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_value, &single_ws); rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 668e51d62..b65d725f5 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -6,6 +6,8 @@ #define __CNXK_EVENTDEV_H__ #include <rte_devargs.h> +#include <rte_ethdev.h> +#include <rte_event_eth_rx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -18,6 +20,7 @@ #define CNXK_SSO_XAE_CNT "xae_cnt" #define CNXK_SSO_GGRP_QOS "qos" +#define CNXK_SSO_FORCE_BP "force_rx_bp" #define CN9K_SSO_SINGLE_WS "single_ws" #define CN10K_SSO_GW_MODE "gw_mode" @@ -81,7 +84,10 @@ struct cnxk_sso_evdev { uint64_t nb_xaq_cfg; rte_iova_t fc_iova; struct rte_mempool *xaq_pool; + uint64_t rx_offloads; uint64_t adptr_xae_cnt; + uint16_t rx_adptr_pool_cnt; + uint64_t *rx_adptr_pools; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -89,25 +95,18 @@ struct cnxk_sso_evdev { uint32_t xae_cnt; uint8_t qos_queue_cnt; struct cnxk_sso_qos *qos_parse_data; + uint8_t force_ena_bp; /* CN9K */ uint8_t dual_ws; /* CN10K */ uint8_t gw_mode; } __rte_cache_aligned; -/* CN10K HWS ops */ -#define CN10K_SSO_HWS_OPS \ - uintptr_t swtag_desched_op; \ - uintptr_t swtag_flush_op; \ - uintptr_t swtag_untag_op; \ - uintptr_t swtag_norm_op; \ - uintptr_t updt_wqe_op; \ - uintptr_t tag_wqe_op; \ - uintptr_t getwrk_op - struct cn10k_sso_hws { - /* Get Work Fastpath data */ - CN10K_SSO_HWS_OPS; + uint64_t base; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint32_t gw_wdata; uint8_t swtag_req; uint8_t hws_id; @@ -115,7 +114,6 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; uintptr_t lmt_base; } __rte_cache_aligned; @@ -132,6 +130,9 @@ struct cn10k_sso_hws { struct cn9k_sso_hws { /* Get Work Fastpath data */ CN9K_SSO_HWS_OPS; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t hws_id; /* Add Work Fastpath data */ @@ -148,6 +149,9 @@ struct cn9k_sso_hws_state { struct cn9k_sso_hws_dual { /* Get Work Fastpath data */ struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */ + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t vws; /* Ping pong bit */ uint8_t hws_id; @@ -250,4 +254,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev, /* CN9K */ void cn9k_sso_set_rsrc(void *arg); +/* Common adapter ops */ +int cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf); +int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id); +int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); +int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); + #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 89a1d82c1..3b7ecb375 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -2,6 +2,7 @@ * Copyright(C) 2021 Marvell. */ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" void @@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, int i; switch (event_type) { + case RTE_EVENT_TYPE_ETHDEV: { + struct cnxk_eth_rxq_sp *rxq = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->rx_adptr_pool_cnt; i++) { + if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i]) + return; + } + + dev->rx_adptr_pool_cnt++; + old_ptr = dev->rx_adptr_pools; + dev->rx_adptr_pools = rte_realloc( + dev->rx_adptr_pools, + sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0); + if (dev->rx_adptr_pools == NULL) { + dev->adptr_xae_cnt += rxq->qconf.mp->size; + dev->rx_adptr_pools = old_ptr; + dev->rx_adptr_pool_cnt--; + return; + } + dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] = + (uint64_t)rxq->qconf.mp; + + dev->adptr_xae_cnt += rxq->qconf.mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; @@ -65,3 +92,134 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, break; } } + +static int +cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, + uint16_t port_id, const struct rte_event *ev, + uint8_t custom_flowid) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 1; + rq->tt = ev->sched_type; + rq->hwgrp = ev->queue_id; + rq->flow_tag_width = 20; + rq->wqe_skip = 1; + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4)) + << 24; + + if (custom_flowid) { + rq->flow_tag_width = 0; + rq->tag_mask |= ev->flow_id; + } + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 0; + rq->flow_tag_width = 32; + rq->tag_mask = 0; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +int +cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t port = eth_dev->data->port_id; + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) + rc |= cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, + i, queue_conf); + } else { + rxq_sp = cnxk_eth_rxq_to_sp( + eth_dev->data->rx_queues[rx_queue_id]); + cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc |= cnxk_sso_rxq_enable( + cnxk_eth_dev, (uint16_t)rx_queue_id, port, + &queue_conf->ev, + !!(queue_conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID)); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, true, + dev->force_ena_bp); + } + + if (rc < 0) { + plt_err("Failed to configure Rx adapter port=%d, q=%d", port, + queue_conf->ev.queue_id); + return rc; + } + + dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags; + + return 0; +} + +int +cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + RTE_SET_USED(event_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) + cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i); + } else { + rxq_sp = cnxk_eth_rxq_to_sp( + eth_dev->data->rx_queues[rx_queue_id]); + rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, false, + dev->force_ena_bp); + } + + if (rc < 0) + plt_err("Failed to clear Rx adapter config port=%d, q=%d", + eth_dev->data->port_id, rx_queue_id); + + return rc; +} + +int +cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +int +cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index 87bb9f76a..eda562f5b 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -21,4 +21,11 @@ sources = files( 'cnxk_tim_worker.c', ) -deps += ['bus_pci', 'common_cnxk'] +extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing'] +foreach flag: extra_flags + if cc.has_argument(flag) + cflags += flag + endif +endforeach + +deps += ['bus_pci', 'common_cnxk', 'net_cnxk'] -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 136 +++++++- drivers/event/cnxk/cn10k_worker.c | 54 ---- drivers/event/cnxk/cn10k_worker.h | 97 +++++- drivers/event/cnxk/cn10k_worker_deq.c | 44 +++ drivers/event/cnxk/cn10k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn10k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_eventdev.c | 305 +++++++++++++++++- drivers/event/cnxk/cn9k_worker.c | 117 ------- drivers/event/cnxk/cn9k_worker.h | 174 ++++++++-- drivers/event/cnxk/cn9k_worker_deq.c | 44 +++ drivers/event/cnxk/cn9k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn9k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_worker_dual_deq.c | 53 +++ .../event/cnxk/cn9k_worker_dual_deq_burst.c | 30 ++ drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 89 +++++ drivers/event/cnxk/cnxk_eventdev.h | 1 + drivers/event/cnxk/meson.build | 9 + 17 files changed, 1124 insertions(+), 231 deletions(-) create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 2060c8fe8..ba7d95fff 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -237,17 +237,141 @@ static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn10k_sso_hws_deq; - event_dev->dequeue_burst = cn10k_sso_hws_deq_burst; - if (dev->is_timeout_deq) { - event_dev->dequeue = cn10k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } } diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index 5dbae275b..c71aa3732 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -60,57 +60,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return 1; - } - - return cn10k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return ret; - } - - ret = cn10k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn10k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index c7250bf9e..b724083ca 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -87,20 +87,37 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, cn10k_sso_hws_fwd_group(ws, ev, grp); } +static __rte_always_inline void +cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t -cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) +cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, void *lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; gw.get_work = ws->gw_wdata; #if defined(RTE_ARCH_ARM64) && !defined(__clang__) asm volatile( PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" - : [wdata] "+r"(gw.get_work) + "sub %[mbuf], %H[wdata], #0x80 \n" + : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf) : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else @@ -109,11 +126,34 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -128,6 +168,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -138,7 +179,9 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " ldp %[tag], %[wqp], [%[tag_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else @@ -146,12 +189,25 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -170,16 +226,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn10k_worker_deq.c b/drivers/event/cnxk/cn10k_worker_deq.c new file mode 100644 index 000000000..36ec454cc --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_burst.c b/drivers/event/cnxk/cn10k_worker_deq_burst.c new file mode 100644 index 000000000..29ecc551c --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_tmo.c b/drivers/event/cnxk/cn10k_worker_deq_tmo.c new file mode 100644 index 000000000..c8524a27b --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 072800c24..e386cb784 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -252,17 +252,202 @@ static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + /* Single WS modes */ + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + /* Dual WS modes */ + const event_dequeue_t sso_hws_dual_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn9k_sso_hws_deq; - event_dev->dequeue_burst = cn9k_sso_hws_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } if (dev->dual_ws) { @@ -272,14 +457,110 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) event_dev->enqueue_forward_burst = cn9k_sso_hws_dual_enq_fwd_burst; - event_dev->dequeue = cn9k_sso_hws_dual_deq; - event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq; - event_dev->dequeue_burst = - cn9k_sso_hws_dual_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_dual_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_dual_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } } } + + rte_mb(); } static void * diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c index 9ceacc98d..538bc4b0b 100644 --- a/drivers/event/cnxk/cn9k_worker.c +++ b/drivers/event/cnxk/cn9k_worker.c @@ -60,60 +60,6 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } -uint16_t __rte_hot -cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return 1; - } - - return cn9k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return ret; - } - - ret = cn9k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn9k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} - /* Dual ws ops. */ uint16_t __rte_hot @@ -171,66 +117,3 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t gw; - - RTE_SET_USED(timeout_ticks); - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return 1; - } - - gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - return gw; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t ret = 1; - uint64_t iter; - - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return ret; - } - - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - } - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index f5a440146..c01c00e1d 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -128,17 +128,36 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, } } +static __rte_always_inline void +cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, struct cn9k_sso_hws_state *ws_pair, - struct rte_event *ev) + struct rte_event *ev, const uint32_t flags, + const void *const lookup_mem, + struct cnxk_timesync_info *const tstamp) { const uint64_t set_gw = BIT_ULL(16) | 1; union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE "rty%=: \n" @@ -147,7 +166,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, " tbnz %[tag], 63, rty%= \n" "done%=: str %[gw], [%[pong]] \n" " dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op), [gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op)); #else @@ -156,12 +178,34 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); plt_write64(set_gw, ws_pair->getwrk_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -169,16 +213,22 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, } static __rte_always_inline uint16_t -cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) +cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, const void *const lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; plt_write64(BIT_ULL(16) | /* wait for work. */ 1, /* Use Mask set 0. */ ws->getwrk_op); + + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE " ldr %[tag], [%[tag_loc]] \n" @@ -190,7 +240,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -198,12 +251,35 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -218,6 +294,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -230,7 +307,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -238,12 +317,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -274,28 +366,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); - -uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c new file mode 100644 index 000000000..51ccaf4ec --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_burst.c b/drivers/event/cnxk/cn9k_worker_deq_burst.c new file mode 100644 index 000000000..4e2801459 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c new file mode 100644 index 000000000..9713d1ef0 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c new file mode 100644 index 000000000..709fa2d9e --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c new file mode 100644 index 000000000..d50e1cf83 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c new file mode 100644 index 000000000..a0508fdf0 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_##name(port, ev, \ + timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index b65d725f5..9d5d2d033 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -33,6 +33,7 @@ #define CNXK_SSO_MZ_NAME "cnxk_evdev_mz" #define CNXK_SSO_XAQ_CACHE_CNT (0x7) #define CNXK_SSO_XAQ_SLACK (8) +#define CNXK_SSO_WQE_SG_PTR (9) #define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) #define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY) diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index eda562f5b..c5c1c0ee8 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -11,8 +11,17 @@ endif sources = files( 'cn9k_eventdev.c', 'cn9k_worker.c', + 'cn9k_worker_deq.c', + 'cn9k_worker_deq_burst.c', + 'cn9k_worker_deq_tmo.c', + 'cn9k_worker_dual_deq.c', + 'cn9k_worker_dual_deq_burst.c', + 'cn9k_worker_dual_deq_tmo.c', 'cn10k_eventdev.c', 'cn10k_worker.c', + 'cn10k_worker_deq.c', + 'cn10k_worker_deq_burst.c', + 'cn10k_worker_deq_tmo.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 4 +- doc/guides/rel_notes/release_21_08.rst | 6 +- drivers/common/cnxk/roc_nix.h | 1 + drivers/common/cnxk/roc_nix_queue.c | 7 +- drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++ drivers/event/cnxk/cn9k_eventdev.c | 148 +++++++++++++++++++++++ drivers/event/cnxk/cnxk_eventdev.h | 22 +++- drivers/event/cnxk/cnxk_eventdev_adptr.c | 88 ++++++++++++++ 8 files changed, 358 insertions(+), 9 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index b7e82c127..6fdccc2ab 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -42,7 +42,9 @@ Features of the OCTEON cnxk SSO PMD are: - HW managed packets enqueued from ethdev to eventdev exposed through event eth RX adapter. - N:1 ethernet device Rx queue to Event queue mapping. -- Full Rx offload support defined through ethdev queue configuration. +- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` + capability while maintaining receive packet order. +- Full Rx/Tx offload support defined through ethdev queue configuration. Prerequisites and Compilation procedure --------------------------------------- diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 3892c8017..80ff93269 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -60,10 +60,10 @@ New Features * Added net/cnxk driver which provides the support for the integrated ethernet device. -* **Added support for Marvell CN10K, CN9K, event Rx adapter.** +* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.** - * Added Rx adapter support for event/cnxk when the ethernet device requested is - net/cnxk. + * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested + is net/cnxk. Removed Items diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index 76613fe84..822c1900e 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -200,6 +200,7 @@ struct roc_nix_sq { uint64_t aura_handle; int16_t nb_sqb_bufs_adj; uint16_t nb_sqb_bufs; + uint16_t aura_sqb_bufs; plt_iova_t io_addr; void *lmt_addr; void *sqe_mem; diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c index 0604e7a18..f69771c15 100644 --- a/drivers/common/cnxk/roc_nix_queue.c +++ b/drivers/common/cnxk/roc_nix_queue.c @@ -587,12 +587,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) aura.fc_ena = 1; aura.fc_addr = (uint64_t)sq->fc; aura.fc_hyst_bits = 0; /* Store count on all updates */ - rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, nb_sqb_bufs, &aura, + rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, NIX_MAX_SQB, &aura, &pool); if (rc) goto fail; - sq->sqe_mem = plt_zmalloc(blk_sz * nb_sqb_bufs, blk_sz); + sq->sqe_mem = plt_zmalloc(blk_sz * NIX_MAX_SQB, blk_sz); if (sq->sqe_mem == NULL) { rc = NIX_ERR_NO_MEM; goto nomem; @@ -600,11 +600,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) /* Fill the initial buffers */ iova = (uint64_t)sq->sqe_mem; - for (count = 0; count < nb_sqb_bufs; count++) { + for (count = 0; count < NIX_MAX_SQB; count++) { roc_npa_aura_op_free(sq->aura_handle, 0, iova); iova += blk_sz; } roc_npa_aura_op_range_set(sq->aura_handle, (uint64_t)sq->sqe_mem, iova); + sq->aura_sqb_bufs = NIX_MAX_SQB; return rc; nomem: diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index ba7d95fff..8a9b04a3d 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); + ws->tx_base = ws->base; ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn10k_sso_hws) + + (sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + + return 0; +} + static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset, cn10k_sso_hws_flush_events); if (rc < 0) @@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + return cn10k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index e386cb784..21f80323d 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(dws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + dws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&dws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = dws; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + } + rte_mb(); + + return 0; +} + static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset, cn9k_sso_hws_flush_events); if (rc < 0) @@ -844,6 +908,86 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static void +cn9k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id, + bool ena) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cn9k_eth_txq *txq; + struct roc_nix_sq *sq; + int i; + + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) + cn9k_sso_txq_fc_update(eth_dev, i, ena); + } else { + uint16_t sq_limit; + + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + txq = eth_dev->data->tx_queues[tx_queue_id]; + sq_limit = + ena ? RTE_MIN(CNXK_SSO_SQB_LIMIT, sq->aura_sqb_bufs) : + sq->nb_sqb_bufs; + txq->nb_sqb_bufs_adj = + sq_limit - + RTE_ALIGN_MUL_CEIL(sq_limit, + (1ULL << txq->sqes_per_sqb_log2)) / + (1ULL << txq->sqes_per_sqb_log2); + txq->nb_sqb_bufs_adj = (70 * txq->nb_sqb_bufs_adj) / 100; + } +} + +static int +cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, true); + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, false); + return cn9k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -863,6 +1007,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 9d5d2d033..24e1be6a9 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -8,6 +8,7 @@ #include <rte_devargs.h> #include <rte_ethdev.h> #include <rte_event_eth_rx_adapter.h> +#include <rte_event_eth_tx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -34,6 +35,7 @@ #define CNXK_SSO_XAQ_CACHE_CNT (0x7) #define CNXK_SSO_XAQ_SLACK (8) #define CNXK_SSO_WQE_SG_PTR (9) +#define CNXK_SSO_SQB_LIMIT (0x180) #define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) #define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY) @@ -86,9 +88,12 @@ struct cnxk_sso_evdev { rte_iova_t fc_iova; struct rte_mempool *xaq_pool; uint64_t rx_offloads; + uint64_t tx_offloads; uint64_t adptr_xae_cnt; uint16_t rx_adptr_pool_cnt; uint64_t *rx_adptr_pools; + uint64_t *tx_adptr_data; + uint16_t max_port_id; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -115,7 +120,10 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; + /* Tx Fastpath data */ + uint64_t tx_base __rte_cache_aligned; uintptr_t lmt_base; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; /* CN9K HWS ops */ @@ -140,7 +148,9 @@ struct cn9k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; + /* Tx Fastpath data */ + uint64_t base __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cn9k_sso_hws_state { @@ -160,7 +170,9 @@ struct cn9k_sso_hws_dual { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base[2]; + /* Tx Fastpath data */ + uint64_t base[2] __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cnxk_sso_hws_cookie { @@ -267,5 +279,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); +int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); +int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 3b7ecb375..502da272d 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -223,3 +223,91 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, return 0; } + +static int +cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs) +{ + return roc_npa_aura_limit_modify( + sq->aura_handle, RTE_MIN(nb_sqb_bufs, sq->aura_sqb_bufs)); +} + +static int +cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev, + uint16_t eth_port_id, uint16_t tx_queue_id, + void *txq) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t max_port_id = dev->max_port_id; + uint64_t *txq_data = dev->tx_adptr_data; + + if (txq_data == NULL || eth_port_id > max_port_id) { + max_port_id = RTE_MAX(max_port_id, eth_port_id); + txq_data = rte_realloc_socket( + txq_data, + (sizeof(uint64_t) * (max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, event_dev->data->socket_id); + if (txq_data == NULL) + return -ENOMEM; + } + + ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) + txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq; + dev->max_port_id = max_port_id; + dev->tx_adptr_data = txq_data; + return 0; +} + +int +cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct roc_nix_sq *sq; + int i, ret; + void *txq; + + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) + cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, i); + } else { + txq = eth_dev->data->tx_queues[tx_queue_id]; + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, txq); + if (ret < 0) + return ret; + + dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags; + } + + return 0; +} + +int +cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct roc_nix_sq *sq; + int i, ret; + + RTE_SET_USED(event_dev); + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) + cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, i); + } else { + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, NULL); + if (ret < 0) + return ret; + } + + return 0; +} -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 38 ++++++++ drivers/event/cnxk/cn10k_worker.h | 67 +++++++++++++ drivers/event/cnxk/cn10k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn10k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_eventdev.c | 81 ++++++++++++++++ drivers/event/cnxk/cn9k_worker.h | 97 +++++++++++++++++++ drivers/event/cnxk/cn9k_worker_dual_tx_enq.c | 23 +++++ .../event/cnxk/cn9k_worker_dual_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cnxk_worker.h | 27 +++--- drivers/event/cnxk/meson.build | 6 ++ 12 files changed, 440 insertions(+), 14 deletions(-) create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 8a9b04a3d..e462f770c 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; @@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; } static void diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index b724083ca..3c90c8500 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -11,6 +11,7 @@ #include "cn10k_ethdev.h" #include "cn10k_rx.h" +#include "cn10k_tx.h" /* SSO Operations */ @@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn10k_eth_txq * +cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn10k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline uint16_t +cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, + uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + const struct cn10k_eth_txq *txq; + struct rte_mbuf *m = ev->mbuf; + uint16_t ref_cnt = m->refcnt; + uintptr_t lmt_addr; + uint16_t lmt_id; + uintptr_t pa; + + lmt_addr = ws->lmt_base; + ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + txq = cn10k_sso_hws_xtract_meta(m, txq_data); + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; + } + if (!ev->sched_type) + cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, + ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c new file mode 100644 index 000000000..f9968ac0d --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c new file mode 100644 index 000000000..a24fc42e5 --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 21f80323d..a69edff19 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -430,6 +430,39 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; @@ -510,6 +543,25 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) } } + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + if (dev->dual_ws) { event_dev->enqueue = cn9k_sso_hws_dual_enq; event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst; @@ -618,8 +670,37 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] + */ + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } } + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; rte_mb(); } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index c01c00e1d..3f9751211 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -11,6 +11,7 @@ #include "cn9k_ethdev.h" #include "cn9k_rx.h" +#include "cn9k_tx.h" /* SSO Operations */ @@ -416,4 +417,100 @@ NIX_RX_FASTPATH_MODES NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline void +cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq) +{ + while (!(((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) + << (txq)->sqes_per_sqb_log2)) + ; +} + +static __rte_always_inline const struct cn9k_eth_txq * +cn9k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn9k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline void +cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m, + uint64_t *cmd, const uint32_t flags) +{ + roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags)); + cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt); +} + +static __rte_always_inline uint16_t +cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + struct rte_mbuf *m = ev->mbuf; + const struct cn9k_eth_txq *txq; + uint16_t ref_cnt = m->refcnt; + + /* Perform header writes before barrier for TSO */ + cn9k_nix_xmit_prepare_tso(m, flags); + /* Lets commit any changes in the packet here in case when + * fast free is set as no further changes will be made to mbuf. + * In case of fast free is not set, both cn9k_nix_prepare_mseg() + * and cn9k_nix_xmit_prepare() has a barrier after refcnt update. + */ + if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_io_wmb(); + txq = cn9k_sso_hws_xtract_meta(m, txq_data); + cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags); + + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + cn9k_sso_txq_fc_wait(txq); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, + txq->io_addr, segdw); + } else { + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, + segdw); + } + } else { + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + cn9k_sso_txq_fc_wait(txq); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_one(cmd, txq->lmt_addr, + txq->io_addr, flags); + } else { + cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, + flags); + } + } + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG, + base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c new file mode 100644 index 000000000..92e2981f0 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws_dual *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c new file mode 100644 index 000000000..dfb574cf9 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws_dual *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c new file mode 100644 index 000000000..3df649c0c --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c new file mode 100644 index 000000000..0efe29113 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h index 4eb46ae16..7891b749d 100644 --- a/drivers/event/cnxk/cnxk_worker.h +++ b/drivers/event/cnxk/cnxk_worker.h @@ -79,21 +79,20 @@ static __rte_always_inline void cnxk_sso_hws_head_wait(uintptr_t tag_op) { #ifdef RTE_ARCH_ARM64 - uint64_t swtp; - - asm volatile(PLT_CPU_FEATURE_PREAMBLE - " ldr %[swtb], [%[swtp_loc]] \n" - " tbz %[swtb], 35, done%= \n" - " sevl \n" - "rty%=: wfe \n" - " ldr %[swtb], [%[swtp_loc]] \n" - " tbnz %[swtb], 35, rty%= \n" - "done%=: \n" - : [swtb] "=&r"(swtp) - : [swtp_loc] "r"(tag_op)); + uint64_t tag; + + asm volatile(" ldr %[tag], [%[tag_op]] \n" + " tbnz %[tag], 35, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[tag], [%[tag_op]] \n" + " tbz %[tag], 35, rty%= \n" + "done%=: \n" + : [tag] "=&r"(tag) + : [tag_op] "r"(tag_op)); #else - /* Wait for the SWTAG/SWTAG_FULL operation */ - while (plt_read64(tag_op) & BIT_ULL(35)) + /* Wait for the HEAD to be set */ + while (!(plt_read64(tag_op) & BIT_ULL(35))) ; #endif } diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index c5c1c0ee8..13e0634e8 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -17,11 +17,17 @@ sources = files( 'cn9k_worker_dual_deq.c', 'cn9k_worker_dual_deq_burst.c', 'cn9k_worker_dual_deq_tmo.c', + 'cn9k_worker_tx_enq.c', + 'cn9k_worker_tx_enq_seg.c', + 'cn9k_worker_dual_tx_enq.c', + 'cn9k_worker_dual_tx_enq_seg.c', 'cn10k_eventdev.c', 'cn10k_worker.c', 'cn10k_worker_deq.c', 'cn10k_worker_deq_burst.c', 'cn10k_worker_deq_tmo.c', + 'cn10k_worker_tx_enq.c', + 'cn10k_worker_tx_enq_seg.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add event vector support for cnxk event Rx adapter, add control path APIs to get vector limits and ability to configure event vectorization on a given Rx queue. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 2 + drivers/event/cnxk/cn10k_eventdev.c | 106 ++++++++++++++++++++++- drivers/event/cnxk/cnxk_eventdev.h | 2 + drivers/event/cnxk/cnxk_eventdev_adptr.c | 25 ++++++ drivers/net/cnxk/cnxk_ethdev.h | 2 +- 5 files changed, 135 insertions(+), 2 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 6fdccc2ab..0297cd3d5 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -45,6 +45,8 @@ Features of the OCTEON cnxk SSO PMD are: - Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` capability while maintaining receive packet order. - Full Rx/Tx offload support defined through ethdev queue configuration. +- HW managed event vectorization on CN10K for packets enqueued from ethdev to + eventdev configurable per each Rx queue in Rx adapter. Prerequisites and Compilation procedure --------------------------------------- diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e462f770c..e85fa4785 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, else *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | - RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID | + RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR; return 0; } @@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_rx_adapter_vector_limits( + const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, + struct rte_event_eth_rx_adapter_vector_limits *limits) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + return -ENOTSUP; + + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + limits->log2_sz = true; + limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2; + limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2; + limits->min_timeout_ns = + (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100; + limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns; + + return 0; +} + +static int +cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev, + uint16_t port_id, uint16_t rq_id, uint16_t sz, + uint64_t tmo_ns, struct rte_mempool *vmp) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + + if (!rq->sso_ena) + return -EINVAL; + if (rq->flow_tag_width == 0) + return -EINVAL; + + rq->vwqe_ena = 1; + rq->vwqe_first_skip = 0; + rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id); + rq->vwqe_max_sz_exp = rte_log2_u32(sz); + rq->vwqe_wait_tmo = + tmo_ns / + ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100); + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= + (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4)) + << 24; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cn10k_sso_rx_adapter_vector_config( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_event_vector_config *config) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + struct cnxk_sso_evdev *dev; + int i, rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + dev = cnxk_sso_pmd_priv(event_dev); + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, i, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + } else { + + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, rx_queue_id, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + + return 0; +} + static int cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, uint32_t *caps) @@ -739,6 +839,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_rx_adapter_vector_limits_get = cn10k_sso_rx_adapter_vector_limits, + .eth_rx_adapter_event_vector_config = + cn10k_sso_rx_adapter_vector_config, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 24e1be6a9..fc49b88d6 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -97,6 +97,8 @@ struct cnxk_sso_evdev { uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; + uint16_t vec_pool_cnt; + uint64_t *vec_pools; /* Dev args */ uint32_t xae_cnt; uint8_t qos_queue_cnt; diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 502da272d..baf2f2aa6 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -38,6 +38,31 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, dev->adptr_xae_cnt += rxq->qconf.mp->size; break; } + case RTE_EVENT_TYPE_ETHDEV_VECTOR: { + struct rte_mempool *mp = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->vec_pool_cnt; i++) { + if ((uint64_t)mp == dev->vec_pools[i]) + return; + } + + dev->vec_pool_cnt++; + old_ptr = dev->vec_pools; + dev->vec_pools = + rte_realloc(dev->vec_pools, + sizeof(uint64_t) * dev->vec_pool_cnt, 0); + if (dev->vec_pools == NULL) { + dev->adptr_xae_cnt += mp->size; + dev->vec_pools = old_ptr; + dev->vec_pool_cnt--; + return; + } + dev->vec_pools[dev->vec_pool_cnt - 1] = (uint64_t)mp; + + dev->adptr_xae_cnt += mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h index 4eead0390..2528b3cda 100644 --- a/drivers/net/cnxk/cnxk_ethdev.h +++ b/drivers/net/cnxk/cnxk_ethdev.h @@ -238,7 +238,7 @@ struct cnxk_eth_txq_sp { } __plt_cache_aligned; static inline struct cnxk_eth_dev * -cnxk_eth_pmd_priv(struct rte_eth_dev *eth_dev) +cnxk_eth_pmd_priv(const struct rte_eth_dev *eth_dev) { return eth_dev->data->dev_private; } -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Rx event vector fastpath to convert HW defined metadata into rte_mbuf and rte_event_vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/rel_notes/release_21_08.rst | 1 + drivers/event/cnxk/cn10k_worker.h | 56 +++++++ drivers/net/cnxk/cn10k_rx.h | 200 +++++++++++++++---------- drivers/net/cnxk/cn10k_rx_vec.c | 2 +- drivers/net/cnxk/cn10k_rx_vec_mseg.c | 5 +- 5 files changed, 179 insertions(+), 85 deletions(-) diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 80ff93269..11ccc9bcb 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -64,6 +64,7 @@ New Features * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested is net/cnxk. + * Add support for event vectorization for Rx adapter. Removed Items diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 3c90c8500..7a48a6b17 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,6 +5,8 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include <rte_vect.h> + #include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" @@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, mbuf_init | ((uint64_t)port_id) << 48, flags); } +static __rte_always_inline void +cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, + void *lookup_mem, void *tstamp) +{ + uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + struct rte_event_vector *vec; + uint16_t nb_mbufs, non_vec; + uint64_t **wqe; + + mbuf_init |= ((uint64_t)port_id) << 48; + vec = (struct rte_event_vector *)vwqe; + wqe = vec->u64s; + + nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP); + nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs, + flags | NIX_RX_VWQE_F, lookup_mem, + tstamp); + wqe += nb_mbufs; + non_vec = vec->nb_elem - nb_mbufs; + + while (non_vec) { + struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0]; + struct rte_mbuf *mbuf; + uint64_t tstamp_ptr; + + mbuf = (struct rte_mbuf *)((char *)cqe - + sizeof(struct rte_mbuf)); + cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, + mbuf_init, flags); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + wqe[0] = (uint64_t *)mbuf; + non_vec--; + wqe++; + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, const uint32_t flags, void *lookup_mem) @@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, flags & NIX_RX_MULTI_SEG_F, (uint64_t *)tstamp_ptr); gw.u64[1] = mbuf; + } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV_VECTOR) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1]; + + vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | + ((vwqe_hdr & 0xFFFF) << 48) | + ((uint64_t)port << 32); + *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr; + cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem, + ws->tstamp); } } diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index d9572b19e..a506a867c 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -21,6 +21,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_RX_VWQE_F BIT(14) #define NIX_RX_MULTI_SEG_F BIT(15) #define CNXK_NIX_CQ_ENTRY_SZ 128 @@ -28,6 +29,11 @@ #define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x)) #define CQE_SZ(x) ((x) * CNXK_NIX_CQ_ENTRY_SZ) +#define CQE_PTR_OFF(b, i, o, f) \ + (((f) & NIX_RX_VWQE_F) ? \ + (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \ + (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o))) + union mbuf_initializer { struct { uint16_t data_off; @@ -317,61 +323,87 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf) } static __rte_always_inline uint16_t -cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) +cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, + const uint16_t flags, void *lookup_mem, + struct cnxk_timesync_info *tstamp) { - struct cn10k_eth_rxq *rxq = rx_queue; - uint16_t packets = 0; + struct cn10k_eth_rxq *rxq = args; + const uint64_t mbuf_initializer = (flags & NIX_RX_VWQE_F) ? + *(uint64_t *)args : + rxq->mbuf_initializer; + const uint64x2_t data_off = flags & NIX_RX_VWQE_F ? + vdupq_n_u64(0x80ULL) : + vdupq_n_u64(rxq->data_off); + const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask; + const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata; + const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc; uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23; - const uint64_t mbuf_initializer = rxq->mbuf_initializer; - const uint64x2_t data_off = vdupq_n_u64(rxq->data_off); uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3; uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer); struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3; - const uint16_t *lookup_mem = rxq->lookup_mem; - const uint32_t qmask = rxq->qmask; - const uint64_t wdata = rxq->wdata; - const uintptr_t desc = rxq->desc; uint8x16_t f0, f1, f2, f3; - uint32_t head = rxq->head; + uint16_t packets = 0; uint16_t pkts_left; - - pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); - pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); - - /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + uint32_t head; + uintptr_t cq0; + + if (!(flags & NIX_RX_VWQE_F)) { + lookup_mem = rxq->lookup_mem; + head = rxq->head; + + pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); + pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); + /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) + tstamp = rxq->tstamp; + } else { + RTE_SET_USED(head); + } while (packets < pkts) { - /* Exit loop if head is about to wrap and become unaligned */ - if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < - NIX_DESCS_PER_LOOP) { - pkts_left += (pkts - packets); - break; - } + if (!(flags & NIX_RX_VWQE_F)) { + /* Exit loop if head is about to wrap and become + * unaligned. + */ + if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < + NIX_DESCS_PER_LOOP) { + pkts_left += (pkts - packets); + break; + } - const uintptr_t cq0 = desc + CQE_SZ(head); + cq0 = desc + CQE_SZ(head); + } else { + cq0 = (uintptr_t)&mbufs[packets]; + } /* Prefetch N desc ahead */ - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(8))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(9))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(10))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(11))); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 8, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 9, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 10, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 11, 0, flags)); /* Get NIX_RX_SG_S for size and buffer pointer */ - cq0_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(0) + 64)); - cq1_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(1) + 64)); - cq2_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(2) + 64)); - cq3_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(3) + 64)); - - /* Extract mbuf from NIX_RX_SG_S */ - mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); - mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); - mbuf01 = vqsubq_u64(mbuf01, data_off); - mbuf23 = vqsubq_u64(mbuf23, data_off); + cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags)); + cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags)); + cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags)); + cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags)); + + if (!(flags & NIX_RX_VWQE_F)) { + /* Extract mbuf from NIX_RX_SG_S */ + mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); + mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); + mbuf01 = vqsubq_u64(mbuf01, data_off); + mbuf23 = vqsubq_u64(mbuf23, data_off); + } else { + mbuf01 = + vsubq_u64(vld1q_u64((uint64_t *)cq0), data_off); + mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)), + data_off); + } /* Move mbufs to scalar registers for future use */ mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0); @@ -395,14 +427,14 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, f3 = vqtbl1q_u8(cq3_w8, shuf_msk); /* Load CQE word0 and word 1 */ - uint64_t cq0_w0 = ((uint64_t *)(cq0 + CQE_SZ(0)))[0]; - uint64_t cq0_w1 = ((uint64_t *)(cq0 + CQE_SZ(0)))[1]; - uint64_t cq1_w0 = ((uint64_t *)(cq0 + CQE_SZ(1)))[0]; - uint64_t cq1_w1 = ((uint64_t *)(cq0 + CQE_SZ(1)))[1]; - uint64_t cq2_w0 = ((uint64_t *)(cq0 + CQE_SZ(2)))[0]; - uint64_t cq2_w1 = ((uint64_t *)(cq0 + CQE_SZ(2)))[1]; - uint64_t cq3_w0 = ((uint64_t *)(cq0 + CQE_SZ(3)))[0]; - uint64_t cq3_w1 = ((uint64_t *)(cq0 + CQE_SZ(3)))[1]; + const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags); + const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 1, flags); + const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags); + const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 1, flags); + const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags); + const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 1, flags); + const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags); + const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 1, flags); if (flags & NIX_RX_OFFLOAD_RSS_F) { /* Fill rss in the rx_descriptor_fields1 */ @@ -459,17 +491,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) { ol_flags0 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(0) + 38), ol_flags0, - mbuf0); + *(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags), + ol_flags0, mbuf0); ol_flags1 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(1) + 38), ol_flags1, - mbuf1); + *(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags), + ol_flags1, mbuf1); ol_flags2 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(2) + 38), ol_flags2, - mbuf2); + *(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags), + ol_flags2, mbuf2); ol_flags3 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(3) + 38), ol_flags3, - mbuf3); + *(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags), + ol_flags3, mbuf3); } if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { @@ -488,7 +520,7 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, RTE_PTYPE_L2_ETHER_TIMESYNC}; const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | PKT_RX_IEEE1588_TMST | - rxq->tstamp->rx_tstamp_dynflag; + tstamp->rx_tstamp_dynflag; const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; uint64x2_t ts01, ts23, mask; uint64_t ts[4]; @@ -526,14 +558,10 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, ts[3] = vgetq_lane_u64(ts23, 1); /* Store timestamp into dynfield. */ - *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = - ts[0]; - *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = - ts[1]; - *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = - ts[2]; - *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = - ts[3]; + *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3]; /* Generate ptype mask to filter L2 ether timesync */ mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); @@ -559,9 +587,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, /* Update Rxq timestamp with the latest * timestamp. */ - rxq->tstamp->rx_ready = 1; - rxq->tstamp->rx_tstamp = - ts[31 - __builtin_clz(res)]; + tstamp->rx_ready = 1; + tstamp->rx_tstamp = ts[31 - __builtin_clz(res)]; } } @@ -584,25 +611,25 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); /* Store the mbufs to rx_pkts */ - vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); - vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); + vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); if (flags & NIX_RX_MULTI_SEG_F) { /* Multi segment is enable build mseg list for * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 0, 8, flags)), + mbuf0, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 1, 8, flags)), + mbuf1, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 2, 8, flags)), + mbuf2, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 3, 8, flags)), + mbuf3, mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; @@ -623,12 +650,18 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, __mempool_check_cookies(mbuf2->pool, (void **)&mbuf2, 1, 1); __mempool_check_cookies(mbuf3->pool, (void **)&mbuf3, 1, 1); - /* Advance head pointer and packets */ - head += NIX_DESCS_PER_LOOP; - head &= qmask; packets += NIX_DESCS_PER_LOOP; + + if (!(flags & NIX_RX_VWQE_F)) { + /* Advance head pointer and packets */ + head += NIX_DESCS_PER_LOOP; + head &= qmask; + } } + if (flags & NIX_RX_VWQE_F) + return packets; + rxq->head = head; rxq->available -= packets; @@ -637,8 +670,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, plt_write64((rxq->wdata | packets), rxq->cq_door); if (unlikely(pkts_left)) - packets += cn10k_nix_recv_pkts(rx_queue, &rx_pkts[packets], - pkts_left, flags); + packets += cn10k_nix_recv_pkts(args, &mbufs[packets], pkts_left, + flags); return packets; } @@ -647,12 +680,15 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, static inline uint16_t cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) + uint16_t pkts, const uint16_t flags, + void *lookup_mem, void *tstamp) { + RTE_SET_USED(lookup_mem); RTE_SET_USED(rx_queue); RTE_SET_USED(rx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(flags); + RTE_SET_USED(tstamp); return 0; } diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c index 93528a44f..166735ad5 100644 --- a/drivers/net/cnxk/cn10k_rx_vec.c +++ b/drivers/net/cnxk/cn10k_rx_vec.c @@ -12,7 +12,7 @@ uint16_t pkts) \ { \ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags)); \ + (flags), NULL, NULL); \ } NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c index 04d1e46c8..1f44ddddd 100644 --- a/drivers/net/cnxk/cn10k_rx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c @@ -9,8 +9,9 @@ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ { \ - return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags) | NIX_RX_MULTI_SEG_F); \ + return cn10k_nix_recv_pkts_vector( \ + rx_queue, rx_pkts, pkts, (flags) | NIX_RX_MULTI_SEG_F, \ + NULL, NULL); \ } NIX_RX_FASTPATH_MODES -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Tx event vector fastpath, integrate event vector Tx routine into Tx burst. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 1 + doc/guides/rel_notes/release_21_08.rst | 2 +- drivers/common/cnxk/roc_sso.h | 23 ++++++ drivers/event/cnxk/cn10k_eventdev.c | 3 +- drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++-- drivers/event/cnxk/cn9k_worker.h | 4 +- drivers/event/cnxk/cnxk_worker.h | 22 ------ drivers/net/cnxk/cn10k_tx.c | 2 +- drivers/net/cnxk/cn10k_tx.h | 52 +++++++++---- drivers/net/cnxk/cn10k_tx_mseg.c | 3 +- drivers/net/cnxk/cn10k_tx_vec.c | 2 +- drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +- 12 files changed, 167 insertions(+), 53 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 0297cd3d5..53560d383 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -47,6 +47,7 @@ Features of the OCTEON cnxk SSO PMD are: - Full Rx/Tx offload support defined through ethdev queue configuration. - HW managed event vectorization on CN10K for packets enqueued from ethdev to eventdev configurable per each Rx queue in Rx adapter. +- Event vector transmission via Tx adapter. Prerequisites and Compilation procedure --------------------------------------- diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 11ccc9bcb..9e49cb27d 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -64,7 +64,7 @@ New Features * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested is net/cnxk. - * Add support for event vectorization for Rx adapter. + * Add support for event vectorization for Rx/Tx adapter. Removed Items diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h index a6030e7d8..316c6ccd5 100644 --- a/drivers/common/cnxk/roc_sso.h +++ b/drivers/common/cnxk/roc_sso.h @@ -44,6 +44,29 @@ struct roc_sso { uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned; } __plt_cache_aligned; +static __rte_always_inline void +roc_sso_hws_head_wait(uintptr_t tag_op) +{ +#ifdef RTE_ARCH_ARM64 + uint64_t tag; + + asm volatile(PLT_CPU_FEATURE_PREAMBLE + " ldr %[tag], [%[tag_op]] \n" + " tbnz %[tag], 35, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[tag], [%[tag_op]] \n" + " tbz %[tag], 35, rty%= \n" + "done%=: \n" + : [tag] "=&r"(tag) + : [tag_op] "r"(tag_op)); +#else + /* Wait for the SWTAG/SWTAG_FULL operation */ + while (!(plt_read64(tag_op) & BIT_ULL(35))) + ; +#endif +} + /* SSO device initialization */ int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso); int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso); diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e85fa4785..6f37c5bd2 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, if (ret) *caps = 0; else - *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR; return 0; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 7a48a6b17..9cc099206 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R -static __rte_always_inline const struct cn10k_eth_txq * +static __rte_always_inline struct cn10k_eth_txq * cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) { - return (const struct cn10k_eth_txq *) + return (struct cn10k_eth_txq *) txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; } +static __rte_always_inline void +cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs, + uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr, + uint8_t sched_type, uintptr_t base, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + uint16_t port[4], queue[4]; + struct cn10k_eth_txq *txq; + uint16_t i, j; + uintptr_t pa; + + for (i = 0; i < nb_mbufs; i += 4) { + port[0] = mbufs[i]->port; + port[1] = mbufs[i + 1]->port; + port[2] = mbufs[i + 2]->port; + port[3] = mbufs[i + 3]->port; + + queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]); + queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]); + queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]); + queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]); + + if (((port[0] ^ port[1]) & (port[2] ^ port[3])) || + ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) { + + for (j = 0; j < 4; j++) { + struct rte_mbuf *m = mbufs[i + j]; + + txq = (struct cn10k_eth_txq *) + txq_data[port[j]][queue[j]]; + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier + * for TSO + */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, + txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg( + m, (uint64_t *)lmt_addr, + flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | + (cn10k_nix_tx_ext_subs(flags) + 1) + << 4; + } + if (!sched_type) + roc_sso_hws_head_wait(base + + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + } + } else { + txq = (struct cn10k_eth_txq *) + txq_data[port[0]][queue[0]]; + cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base + + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], const uint32_t flags) { - const struct cn10k_eth_txq *txq; - struct rte_mbuf *m = ev->mbuf; - uint16_t ref_cnt = m->refcnt; + struct cn10k_eth_txq *txq; + struct rte_mbuf *m; uintptr_t lmt_addr; + uint16_t ref_cnt; uint16_t lmt_id; uintptr_t pa; lmt_addr = ws->lmt_base; ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + + if (ev->event_type & RTE_EVENT_TYPE_VECTOR) { + struct rte_mbuf **mbufs = ev->vec->mbufs; + uint64_t meta = *(uint64_t *)ev->vec; + + if (meta & BIT(31)) { + txq = (struct cn10k_eth_txq *) + txq_data[meta >> 32][meta >> 48]; + + cn10k_nix_xmit_pkts_vector( + txq, mbufs, meta & 0xFFFF, cmd, + ws->tx_base + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } else { + cn10k_sso_vwqe_split_tx( + mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr, + ev->sched_type, ws->tx_base, txq_data, flags); + } + rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec); + return (meta & 0xFFFF); + } + + m = ev->mbuf; + ref_cnt = m->refcnt; txq = cn10k_sso_hws_xtract_meta(m, txq_data); cn10k_nix_tx_skeleton(txq, cmd, flags); /* Perform header writes before barrier for TSO */ @@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; } if (!ev->sched_type) - cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); roc_lmt_submit_steorl(lmt_id, pa); @@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); - return 1; } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 3f9751211..cc1e14195 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -466,7 +466,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); cn9k_sso_txq_fc_wait(txq); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, @@ -478,7 +478,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, } else { if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); cn9k_sso_txq_fc_wait(txq); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_one(cmd, txq->lmt_addr, diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h index 7891b749d..9f9ceab8a 100644 --- a/drivers/event/cnxk/cnxk_worker.h +++ b/drivers/event/cnxk/cnxk_worker.h @@ -75,26 +75,4 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op) #endif } -static __rte_always_inline void -cnxk_sso_hws_head_wait(uintptr_t tag_op) -{ -#ifdef RTE_ARCH_ARM64 - uint64_t tag; - - asm volatile(" ldr %[tag], [%[tag_op]] \n" - " tbnz %[tag], 35, done%= \n" - " sevl \n" - "rty%=: wfe \n" - " ldr %[tag], [%[tag_op]] \n" - " tbz %[tag], 35, rty%= \n" - "done%=: \n" - : [tag] "=&r"(tag) - : [tag_op] "r"(tag_op)); -#else - /* Wait for the HEAD to be set */ - while (!(plt_read64(tag_op) & BIT_ULL(35))) - ; -#endif -} - #endif diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 1f30bab59..0e1276c60 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \ - flags); \ + 0, flags); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index eb148b8e7..f75cae07a 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -18,6 +18,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_TX_VWQE_F BIT(14) #define NIX_TX_MULTI_SEG_F BIT(15) #define NIX_TX_NEED_SEND_HDR_W1 \ @@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) static __rte_always_inline uint16_t cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, - uint64_t *cmd, const uint16_t flags) + uint64_t *cmd, uintptr_t base, const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; const rte_iova_t io_addr = txq->io_addr; @@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t lso_tun_fmt; uint64_t data; - NIX_XMIT_FC_OR_RETURN(txq, pkts); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } /* Get cmd skeleton */ cn10k_nix_tx_skeleton(txq, cmd, flags); - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; - if (flags & NIX_TX_OFFLOAD_TSO_F) lso_tun_fmt = txq->lso_tun_fmt; @@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2); } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (burst > 16) { data = cn10k_nix_tx_steor_data(flags); @@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; uintptr_t pa0, pa1, lmt_addr = txq->lmt_base; @@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, shft += 3; } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + data0 = (uint64_t)data128; data1 = (uint64_t)(data128 >> 64); /* Make data0 similar to data1 */ @@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; @@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64_t data[2]; } wd; - NIX_XMIT_FC_OR_RETURN(txq, pkts); - - scalar = pkts & (NIX_DESCS_PER_LOOP - 1); - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } else { + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + } - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; /* Perform header writes before barrier for TSO */ if (flags & NIX_TX_OFFLOAD_TSO_F) { for (i = 0; i < pkts; i++) @@ -1973,6 +1987,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (flags & NIX_TX_MULTI_SEG_F) wd.data[0] >>= 16; + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (lnum > 16) { if (!(flags & NIX_TX_MULTI_SEG_F)) @@ -2029,10 +2046,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (unlikely(scalar)) { if (flags & NIX_TX_MULTI_SEG_F) pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, - scalar, cmd, flags); + scalar, cmd, base, + flags); else pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, - cmd, flags); + cmd, base, flags); } return pkts; @@ -2041,13 +2059,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, #else static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { RTE_SET_USED(tx_queue); RTE_SET_USED(tx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(cmd); RTE_SET_USED(flags); + RTE_SET_USED(base); return 0; } #endif diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c index 33f675472..4ea4c8a4e 100644 --- a/drivers/net/cnxk/cn10k_tx_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_mseg.c @@ -18,7 +18,8 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \ - (flags) | NIX_TX_MULTI_SEG_F); \ + 0, (flags) \ + | NIX_TX_MULTI_SEG_F); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 34e373750..a0350496a 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -18,7 +18,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ - (flags)); \ + 0, (flags)); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c index 1fad81dba..7f98f79b9 100644 --- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector( \ - tx_queue, tx_pkts, pkts, cmd, \ + tx_queue, tx_pkts, pkts, cmd, 0, \ (flags) | NIX_TX_MULTI_SEG_F); \ } -- 2.17.1
On Sat, Jul 03, 2021 at 02:44:04AM +0530, pbhagavatula@marvell.com wrote: > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Add support for event eth Tx adapter. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > --- > doc/guides/eventdevs/cnxk.rst | 4 +- > doc/guides/rel_notes/release_21_08.rst | 6 +- > drivers/common/cnxk/roc_nix.h | 1 + > drivers/common/cnxk/roc_nix_queue.c | 7 +- > drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++ > drivers/event/cnxk/cn9k_eventdev.c | 148 +++++++++++++++++++++++ > drivers/event/cnxk/cnxk_eventdev.h | 22 +++- > drivers/event/cnxk/cnxk_eventdev_adptr.c | 88 ++++++++++++++ > 8 files changed, 358 insertions(+), 9 deletions(-) > > diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst > index b7e82c127..6fdccc2ab 100644 > --- a/doc/guides/eventdevs/cnxk.rst > +++ b/doc/guides/eventdevs/cnxk.rst > @@ -42,7 +42,9 @@ Features of the OCTEON cnxk SSO PMD are: > - HW managed packets enqueued from ethdev to eventdev exposed through event eth > RX adapter. > - N:1 ethernet device Rx queue to Event queue mapping. > -- Full Rx offload support defined through ethdev queue configuration. > +- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` > + capability while maintaining receive packet order. > +- Full Rx/Tx offload support defined through ethdev queue configuration. > > Prerequisites and Compilation procedure > --------------------------------------- > diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst > index 3892c8017..80ff93269 100644 > --- a/doc/guides/rel_notes/release_21_08.rst > +++ b/doc/guides/rel_notes/release_21_08.rst > @@ -60,10 +60,10 @@ New Features > * Added net/cnxk driver which provides the support for the integrated ethernet > device. > > -* **Added support for Marvell CN10K, CN9K, event Rx adapter.** > +* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.** > > - * Added Rx adapter support for event/cnxk when the ethernet device requested is > - net/cnxk. > + * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested > + is net/cnxk. > > > Removed Items > diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h > index 76613fe84..822c1900e 100644 > --- a/drivers/common/cnxk/roc_nix.h > +++ b/drivers/common/cnxk/roc_nix.h > @@ -200,6 +200,7 @@ struct roc_nix_sq { > uint64_t aura_handle; > int16_t nb_sqb_bufs_adj; > uint16_t nb_sqb_bufs; > + uint16_t aura_sqb_bufs; > plt_iova_t io_addr; > void *lmt_addr; > void *sqe_mem; > diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c > index 0604e7a18..f69771c15 100644 > --- a/drivers/common/cnxk/roc_nix_queue.c > +++ b/drivers/common/cnxk/roc_nix_queue.c > @@ -587,12 +587,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) > aura.fc_ena = 1; > aura.fc_addr = (uint64_t)sq->fc; > aura.fc_hyst_bits = 0; /* Store count on all updates */ > - rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, nb_sqb_bufs, &aura, > + rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, NIX_MAX_SQB, &aura, > &pool); > if (rc) > goto fail; > > - sq->sqe_mem = plt_zmalloc(blk_sz * nb_sqb_bufs, blk_sz); > + sq->sqe_mem = plt_zmalloc(blk_sz * NIX_MAX_SQB, blk_sz); > if (sq->sqe_mem == NULL) { > rc = NIX_ERR_NO_MEM; > goto nomem; > @@ -600,11 +600,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) > > /* Fill the initial buffers */ > iova = (uint64_t)sq->sqe_mem; > - for (count = 0; count < nb_sqb_bufs; count++) { > + for (count = 0; count < NIX_MAX_SQB; count++) { > roc_npa_aura_op_free(sq->aura_handle, 0, iova); > iova += blk_sz; > } > roc_npa_aura_op_range_set(sq->aura_handle, (uint64_t)sq->sqe_mem, iova); > + sq->aura_sqb_bufs = NIX_MAX_SQB; Since now aura is created with NIX_MAX_SQB's, we need to also modify the aura limit here to sq->nb_sqb_bufs for poll mode ? With this fixed, Acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com> > > return rc; > nomem: > diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c > index ba7d95fff..8a9b04a3d 100644 > --- a/drivers/event/cnxk/cn10k_eventdev.c > +++ b/drivers/event/cnxk/cn10k_eventdev.c > @@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) > /* First cache line is reserved for cookie */ > ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); > ws->base = roc_sso_hws_base_get(&dev->sso, port_id); > + ws->tx_base = ws->base; > ws->hws_id = port_id; > ws->swtag_req = 0; > ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); > @@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) > return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); > } > > +static int > +cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) > +{ > + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); > + int i; > + > + if (dev->tx_adptr_data == NULL) > + return 0; > + > + for (i = 0; i < dev->nb_event_ports; i++) { > + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; > + void *ws_cookie; > + > + ws_cookie = cnxk_sso_hws_get_cookie(ws); > + ws_cookie = rte_realloc_socket( > + ws_cookie, > + sizeof(struct cnxk_sso_hws_cookie) + > + sizeof(struct cn10k_sso_hws) + > + (sizeof(uint64_t) * (dev->max_port_id + 1) * > + RTE_MAX_QUEUES_PER_PORT), > + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); > + if (ws_cookie == NULL) > + return -ENOMEM; > + ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie)); > + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, > + sizeof(uint64_t) * (dev->max_port_id + 1) * > + RTE_MAX_QUEUES_PER_PORT); > + event_dev->data->ports[i] = ws; > + } > + > + return 0; > +} > + > static void > cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) > { > @@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev) > { > int rc; > > + rc = cn10k_sso_updt_tx_adptr_data(event_dev); > + if (rc < 0) > + return rc; > + > rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset, > cn10k_sso_hws_flush_events); > if (rc < 0) > @@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, > return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); > } > > +static int > +cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, > + const struct rte_eth_dev *eth_dev, uint32_t *caps) > +{ > + int ret; > + > + RTE_SET_USED(dev); > + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); > + if (ret) > + *caps = 0; > + else > + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; > + > + return 0; > +} > + > +static int > +cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, > + const struct rte_eth_dev *eth_dev, > + int32_t tx_queue_id) > +{ > + int rc; > + > + RTE_SET_USED(id); > + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); > + if (rc < 0) > + return rc; > + rc = cn10k_sso_updt_tx_adptr_data(event_dev); > + if (rc < 0) > + return rc; > + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); > + > + return 0; > +} > + > +static int > +cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, > + const struct rte_eth_dev *eth_dev, > + int32_t tx_queue_id) > +{ > + int rc; > + > + RTE_SET_USED(id); > + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); > + if (rc < 0) > + return rc; > + return cn10k_sso_updt_tx_adptr_data(event_dev); > +} > + > static struct rte_eventdev_ops cn10k_sso_dev_ops = { > .dev_infos_get = cn10k_sso_info_get, > .dev_configure = cn10k_sso_dev_configure, > @@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { > .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, > .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, > > + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, > + .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, > + .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, > + > .timer_adapter_caps_get = cnxk_tim_caps_get, > > .dump = cnxk_sso_dump, > diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c > index e386cb784..21f80323d 100644 > --- a/drivers/event/cnxk/cn9k_eventdev.c > +++ b/drivers/event/cnxk/cn9k_eventdev.c > @@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) > return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); > } > > +static int > +cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) > +{ > + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); > + int i; > + > + if (dev->tx_adptr_data == NULL) > + return 0; > + > + for (i = 0; i < dev->nb_event_ports; i++) { > + if (dev->dual_ws) { > + struct cn9k_sso_hws_dual *dws = > + event_dev->data->ports[i]; > + void *ws_cookie; > + > + ws_cookie = cnxk_sso_hws_get_cookie(dws); > + ws_cookie = rte_realloc_socket( > + ws_cookie, > + sizeof(struct cnxk_sso_hws_cookie) + > + sizeof(struct cn9k_sso_hws_dual) + > + (sizeof(uint64_t) * > + (dev->max_port_id + 1) * > + RTE_MAX_QUEUES_PER_PORT), > + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); > + if (ws_cookie == NULL) > + return -ENOMEM; > + dws = RTE_PTR_ADD(ws_cookie, > + sizeof(struct cnxk_sso_hws_cookie)); > + memcpy(&dws->tx_adptr_data, dev->tx_adptr_data, > + sizeof(uint64_t) * (dev->max_port_id + 1) * > + RTE_MAX_QUEUES_PER_PORT); > + event_dev->data->ports[i] = dws; > + } else { > + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; > + void *ws_cookie; > + > + ws_cookie = cnxk_sso_hws_get_cookie(ws); > + ws_cookie = rte_realloc_socket( > + ws_cookie, > + sizeof(struct cnxk_sso_hws_cookie) + > + sizeof(struct cn9k_sso_hws_dual) + > + (sizeof(uint64_t) * > + (dev->max_port_id + 1) * > + RTE_MAX_QUEUES_PER_PORT), > + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); > + if (ws_cookie == NULL) > + return -ENOMEM; > + ws = RTE_PTR_ADD(ws_cookie, > + sizeof(struct cnxk_sso_hws_cookie)); > + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, > + sizeof(uint64_t) * (dev->max_port_id + 1) * > + RTE_MAX_QUEUES_PER_PORT); > + event_dev->data->ports[i] = ws; > + } > + } > + rte_mb(); > + > + return 0; > +} > + > static void > cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) > { > @@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev) > { > int rc; > > + rc = cn9k_sso_updt_tx_adptr_data(event_dev); > + if (rc < 0) > + return rc; > + > rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset, > cn9k_sso_hws_flush_events); > if (rc < 0) > @@ -844,6 +908,86 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, > return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); > } > > +static int > +cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, > + const struct rte_eth_dev *eth_dev, uint32_t *caps) > +{ > + int ret; > + > + RTE_SET_USED(dev); > + ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); > + if (ret) > + *caps = 0; > + else > + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; > + > + return 0; > +} > + > +static void > +cn9k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id, > + bool ena) > +{ > + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; > + struct cn9k_eth_txq *txq; > + struct roc_nix_sq *sq; > + int i; > + > + if (tx_queue_id < 0) { > + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) > + cn9k_sso_txq_fc_update(eth_dev, i, ena); > + } else { > + uint16_t sq_limit; > + > + sq = &cnxk_eth_dev->sqs[tx_queue_id]; > + txq = eth_dev->data->tx_queues[tx_queue_id]; > + sq_limit = > + ena ? RTE_MIN(CNXK_SSO_SQB_LIMIT, sq->aura_sqb_bufs) : > + sq->nb_sqb_bufs; > + txq->nb_sqb_bufs_adj = > + sq_limit - > + RTE_ALIGN_MUL_CEIL(sq_limit, > + (1ULL << txq->sqes_per_sqb_log2)) / > + (1ULL << txq->sqes_per_sqb_log2); > + txq->nb_sqb_bufs_adj = (70 * txq->nb_sqb_bufs_adj) / 100; > + } > +} > + > +static int > +cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, > + const struct rte_eth_dev *eth_dev, > + int32_t tx_queue_id) > +{ > + int rc; > + > + RTE_SET_USED(id); > + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); > + if (rc < 0) > + return rc; > + cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, true); > + rc = cn9k_sso_updt_tx_adptr_data(event_dev); > + if (rc < 0) > + return rc; > + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); > + > + return 0; > +} > + > +static int > +cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, > + const struct rte_eth_dev *eth_dev, > + int32_t tx_queue_id) > +{ > + int rc; > + > + RTE_SET_USED(id); > + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); > + if (rc < 0) > + return rc; > + cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, false); > + return cn9k_sso_updt_tx_adptr_data(event_dev); > +} > + > static struct rte_eventdev_ops cn9k_sso_dev_ops = { > .dev_infos_get = cn9k_sso_info_get, > .dev_configure = cn9k_sso_dev_configure, > @@ -863,6 +1007,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { > .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, > .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, > > + .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get, > + .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add, > + .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del, > + > .timer_adapter_caps_get = cnxk_tim_caps_get, > > .dump = cnxk_sso_dump, > diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h > index 9d5d2d033..24e1be6a9 100644 > --- a/drivers/event/cnxk/cnxk_eventdev.h > +++ b/drivers/event/cnxk/cnxk_eventdev.h > @@ -8,6 +8,7 @@ > #include <rte_devargs.h> > #include <rte_ethdev.h> > #include <rte_event_eth_rx_adapter.h> > +#include <rte_event_eth_tx_adapter.h> > #include <rte_kvargs.h> > #include <rte_mbuf_pool_ops.h> > #include <rte_pci.h> > @@ -34,6 +35,7 @@ > #define CNXK_SSO_XAQ_CACHE_CNT (0x7) > #define CNXK_SSO_XAQ_SLACK (8) > #define CNXK_SSO_WQE_SG_PTR (9) > +#define CNXK_SSO_SQB_LIMIT (0x180) > > #define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) > #define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY) > @@ -86,9 +88,12 @@ struct cnxk_sso_evdev { > rte_iova_t fc_iova; > struct rte_mempool *xaq_pool; > uint64_t rx_offloads; > + uint64_t tx_offloads; > uint64_t adptr_xae_cnt; > uint16_t rx_adptr_pool_cnt; > uint64_t *rx_adptr_pools; > + uint64_t *tx_adptr_data; > + uint16_t max_port_id; > uint16_t tim_adptr_ring_cnt; > uint16_t *timer_adptr_rings; > uint64_t *timer_adptr_sz; > @@ -115,7 +120,10 @@ struct cn10k_sso_hws { > uint64_t xaq_lmt __rte_cache_aligned; > uint64_t *fc_mem; > uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; > + /* Tx Fastpath data */ > + uint64_t tx_base __rte_cache_aligned; > uintptr_t lmt_base; > + uint8_t tx_adptr_data[]; > } __rte_cache_aligned; > > /* CN9K HWS ops */ > @@ -140,7 +148,9 @@ struct cn9k_sso_hws { > uint64_t xaq_lmt __rte_cache_aligned; > uint64_t *fc_mem; > uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; > - uint64_t base; > + /* Tx Fastpath data */ > + uint64_t base __rte_cache_aligned; > + uint8_t tx_adptr_data[]; > } __rte_cache_aligned; > > struct cn9k_sso_hws_state { > @@ -160,7 +170,9 @@ struct cn9k_sso_hws_dual { > uint64_t xaq_lmt __rte_cache_aligned; > uint64_t *fc_mem; > uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; > - uint64_t base[2]; > + /* Tx Fastpath data */ > + uint64_t base[2] __rte_cache_aligned; > + uint8_t tx_adptr_data[]; > } __rte_cache_aligned; > > struct cnxk_sso_hws_cookie { > @@ -267,5 +279,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, > const struct rte_eth_dev *eth_dev); > int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, > const struct rte_eth_dev *eth_dev); > +int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, > + const struct rte_eth_dev *eth_dev, > + int32_t tx_queue_id); > +int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, > + const struct rte_eth_dev *eth_dev, > + int32_t tx_queue_id); > > #endif /* __CNXK_EVENTDEV_H__ */ > diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c > index 3b7ecb375..502da272d 100644 > --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c > +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c > @@ -223,3 +223,91 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, > > return 0; > } > + > +static int > +cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs) > +{ > + return roc_npa_aura_limit_modify( > + sq->aura_handle, RTE_MIN(nb_sqb_bufs, sq->aura_sqb_bufs)); > +} > + > +static int > +cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev, > + uint16_t eth_port_id, uint16_t tx_queue_id, > + void *txq) > +{ > + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); > + uint16_t max_port_id = dev->max_port_id; > + uint64_t *txq_data = dev->tx_adptr_data; > + > + if (txq_data == NULL || eth_port_id > max_port_id) { > + max_port_id = RTE_MAX(max_port_id, eth_port_id); > + txq_data = rte_realloc_socket( > + txq_data, > + (sizeof(uint64_t) * (max_port_id + 1) * > + RTE_MAX_QUEUES_PER_PORT), > + RTE_CACHE_LINE_SIZE, event_dev->data->socket_id); > + if (txq_data == NULL) > + return -ENOMEM; > + } > + > + ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) > + txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq; > + dev->max_port_id = max_port_id; > + dev->tx_adptr_data = txq_data; > + return 0; > +} > + > +int > +cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, > + const struct rte_eth_dev *eth_dev, > + int32_t tx_queue_id) > +{ > + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; > + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); > + struct roc_nix_sq *sq; > + int i, ret; > + void *txq; > + > + if (tx_queue_id < 0) { > + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) > + cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, i); > + } else { > + txq = eth_dev->data->tx_queues[tx_queue_id]; > + sq = &cnxk_eth_dev->sqs[tx_queue_id]; > + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT); > + ret = cnxk_sso_updt_tx_queue_data( > + event_dev, eth_dev->data->port_id, tx_queue_id, txq); > + if (ret < 0) > + return ret; > + > + dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags; > + } > + > + return 0; > +} > + > +int > +cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, > + const struct rte_eth_dev *eth_dev, > + int32_t tx_queue_id) > +{ > + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; > + struct roc_nix_sq *sq; > + int i, ret; > + > + RTE_SET_USED(event_dev); > + if (tx_queue_id < 0) { > + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) > + cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, i); > + } else { > + sq = &cnxk_eth_dev->sqs[tx_queue_id]; > + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs); > + ret = cnxk_sso_updt_tx_queue_data( > + event_dev, eth_dev->data->port_id, tx_queue_id, NULL); > + if (ret < 0) > + return ret; > + } > + > + return 0; > +} > -- > 2.17.1 >
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter. Resize cn10k workslot fastpath structure to fit in 64B cacheline size. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- v7 Changes: - Set correct limits for SQB aura. v6 Changes: - More code cleanup. - Fix incorrect SQB configuration and missing fc check. v5 Changes: - Use cnxk_eth_rxq_to_sp instead of manually calculating sp offset. v4 Changes: - Split patches for easier merge. v3 Changes: - Spell check. doc/guides/eventdevs/cnxk.rst | 28 ++++ doc/guides/rel_notes/release_21_08.rst | 5 + drivers/common/cnxk/roc_nix.h | 3 + drivers/common/cnxk/roc_nix_fc.c | 78 +++++++++++ drivers/common/cnxk/roc_nix_priv.h | 3 +- drivers/common/cnxk/version.map | 1 + drivers/event/cnxk/cn10k_eventdev.c | 107 ++++++++++++--- drivers/event/cnxk/cn10k_worker.c | 7 +- drivers/event/cnxk/cn10k_worker.h | 32 +++-- drivers/event/cnxk/cn9k_eventdev.c | 89 +++++++++++++ drivers/event/cnxk/cn9k_worker.h | 4 + drivers/event/cnxk/cnxk_eventdev.c | 2 + drivers/event/cnxk/cnxk_eventdev.h | 43 ++++-- drivers/event/cnxk/cnxk_eventdev_adptr.c | 158 +++++++++++++++++++++++ drivers/event/cnxk/meson.build | 9 +- 15 files changed, 522 insertions(+), 47 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 36da3800c..b7e82c127 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -39,6 +39,10 @@ Features of the OCTEON cnxk SSO PMD are: time granularity of 2.5us on CN9K and 1us on CN10K. - Up to 256 TIM rings a.k.a event timer adapters. - Up to 8 rings traversed in parallel. +- HW managed packets enqueued from ethdev to eventdev exposed through event eth + RX adapter. +- N:1 ethernet device Rx queue to Event queue mapping. +- Full Rx offload support defined through ethdev queue configuration. Prerequisites and Compilation procedure --------------------------------------- @@ -93,6 +97,15 @@ Runtime Config Options -a 0002:0e:00.0,qos=[1-50-50-50] +- ``Force Rx Back pressure`` + + Force Rx back pressure when same mempool is used across ethernet device + connected to event device. + + For example:: + + -a 0002:0e:00.0,force_rx_bp=1 + - ``TIM disable NPA`` By default chunks are allocated from NPA then TIM can automatically free @@ -160,3 +173,18 @@ Debugging Options +---+------------+-------------------------------------------------------+ | 2 | TIM | --log-level='pmd\.event\.cnxk\.timer,8' | +---+------------+-------------------------------------------------------+ + +Limitations +----------- + +Rx adapter support +~~~~~~~~~~~~~~~~~~ + +Using the same mempool for all the ethernet device ports connected to +event device would cause back pressure to be asserted only on the first +ethernet device. +Back pressure is automatically disabled when using same mempool for all the +ethernet devices connected to event device to override this applications can +use `force_rx_bp=1` device arguments. +Using unique mempool per each ethernet device is recommended when they are +connected to event device. diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 31e49e1a5..3892c8017 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -60,6 +60,11 @@ New Features * Added net/cnxk driver which provides the support for the integrated ethernet device. +* **Added support for Marvell CN10K, CN9K, event Rx adapter.** + + * Added Rx adapter support for event/cnxk when the ethernet device requested is + net/cnxk. + Removed Items ------------- diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index bb6902795..76613fe84 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix); +void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, + uint8_t ena, uint8_t force); + /* NPC */ int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable); diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c index 47be8aa3f..f17eba416 100644 --- a/drivers/common/cnxk/roc_nix_fc.c +++ b/drivers/common/cnxk/roc_nix_fc.c @@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode mode) exit: return rc; } + +void +rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena, + uint8_t force) +{ + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + struct npa_lf *lf = idev_npa_obj_get(); + struct npa_aq_enq_req *req; + struct npa_aq_enq_rsp *rsp; + struct mbox *mbox; + uint32_t limit; + int rc; + + if (roc_nix_is_sdp(roc_nix)) + return; + + if (!lf) + return; + mbox = lf->mbox; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_READ; + + rc = mbox_process_msg(mbox, (void *)&rsp); + if (rc) + return; + + limit = rsp->aura.limit; + /* BP is already enabled. */ + if (rsp->aura.bp_ena) { + /* If BP ids don't match disable BP. */ + if ((rsp->aura.nix0_bpid != nix->bpid[0]) && !force) { + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + req->aura.bp_ena = 0; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); + } + return; + } + + /* BP was previously enabled but now disabled skip. */ + if (rsp->aura.bp) + return; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + if (ena) { + req->aura.nix0_bpid = nix->bpid[0]; + req->aura_mask.nix0_bpid = ~(req->aura_mask.nix0_bpid); + req->aura.bp = NIX_RQ_AURA_THRESH( + limit > 128 ? 256 : limit); /* 95% of size*/ + req->aura_mask.bp = ~(req->aura_mask.bp); + } + + req->aura.bp_ena = !!ena; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); +} diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h index d9c32df44..9dc0c88a6 100644 --- a/drivers/common/cnxk/roc_nix_priv.h +++ b/drivers/common/cnxk/roc_nix_priv.h @@ -16,7 +16,8 @@ #define NIX_SQB_LOWER_THRESH ((uint16_t)70) /* Apply BP/DROP when CQ is 95% full */ -#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_RQ_AURA_THRESH(x) (((x) * 95) / 100) /* IRQ triggered when NIX_LF_CINTX_CNT[QCOUNT] crosses this value */ #define CQ_CQE_THRESH_DEFAULT 0x1ULL diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map index 8a5c839e5..cb1ce4b6f 100644 --- a/drivers/common/cnxk/version.map +++ b/drivers/common/cnxk/version.map @@ -29,6 +29,7 @@ INTERNAL { roc_nix_fc_config_set; roc_nix_fc_mode_set; roc_nix_fc_mode_get; + rox_nix_fc_npa_bp_cfg; roc_nix_get_base_chan; roc_nix_get_pf; roc_nix_get_pf_func; diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index bf4052c76..2060c8fe8 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -6,18 +6,6 @@ #include "cnxk_eventdev.h" #include "cnxk_worker.h" -static void -cn10k_init_hws_ops(struct cn10k_sso_hws *ws, uintptr_t base) -{ - ws->tag_wqe_op = base + SSOW_LF_GWS_WQE0; - ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0; - ws->updt_wqe_op = base + SSOW_LF_GWS_OP_UPD_WQP_GRP1; - ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM; - ws->swtag_untag_op = base + SSOW_LF_GWS_OP_SWTAG_UNTAG; - ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH; - ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED; -} - static uint32_t cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev) { @@ -56,7 +44,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); - cn10k_init_hws_ops(ws, ws->base); ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -135,13 +122,14 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cq_ds_cnt &= 0x3FFF3FFF0000; while (aq_cnt || cq_ds_cnt || ds_cnt) { - plt_write64(req, ws->getwrk_op); + plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0); cn10k_sso_hws_get_work_empty(ws, &ev); if (fn != NULL && ev.u64 != 0) fn(arg, ev); if (ev.sched_type != SSO_TT_EMPTY) - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, - ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush( + ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); do { val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE); } while (val & BIT_ULL(56)); @@ -205,9 +193,11 @@ cn10k_sso_hws_reset(void *arg, void *hws) if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) { - plt_write64(BIT_ULL(16) | 1, ws->getwrk_op); + plt_write64(BIT_ULL(16) | 1, + ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0)); if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */ @@ -407,6 +397,80 @@ cn10k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn10k)); } +static int +cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } +} + +static int +cn10k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn10k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn10k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -420,6 +484,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .port_unlink = cn10k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -502,6 +572,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn10k, cn10k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN10K_SSO_GW_MODE "=<int>" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index e2aa534c6..5dbae275b 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -18,7 +18,8 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev) cn10k_sso_hws_forward_event(ws, ev); break; case RTE_EVENT_OP_RELEASE: - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); break; default: return 0; @@ -69,7 +70,7 @@ cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return 1; } @@ -94,7 +95,7 @@ cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return ret; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 2f093a8dd..c7250bf9e 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn10k_ethdev.h" +#include "cn10k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t @@ -31,7 +35,8 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) { const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(ws->tag_wqe_op)); + const uint8_t cur_tt = + CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)); /* CNXK model * cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED @@ -43,9 +48,11 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) if (new_tt == SSO_TT_UNTAGGED) { if (cur_tt != SSO_TT_UNTAGGED) - cnxk_sso_hws_swtag_untag(ws->swtag_untag_op); + cnxk_sso_hws_swtag_untag(ws->base + + SSOW_LF_GWS_OP_SWTAG_UNTAG); } else { - cnxk_sso_hws_swtag_norm(tag, new_tt, ws->swtag_norm_op); + cnxk_sso_hws_swtag_norm(tag, new_tt, + ws->base + SSOW_LF_GWS_OP_SWTAG_NORM); } ws->swtag_req = 1; } @@ -57,8 +64,9 @@ cn10k_sso_hws_fwd_group(struct cn10k_sso_hws *ws, const struct rte_event *ev, const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - plt_write64(ev->u64, ws->updt_wqe_op); - cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op); + plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1); + cnxk_sso_hws_swtag_desched(tag, new_tt, grp, + ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED); } static __rte_always_inline void @@ -68,7 +76,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, const uint8_t grp = ev->queue_id; /* Group hasn't changed, Use SWTAG to forward the event */ - if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_wqe_op)) == grp) + if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp) cn10k_sso_hws_fwd_swtag(ws, ev); else /* @@ -93,12 +101,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" : [wdata] "+r"(gw.get_work) - : [gw_loc] "r"(ws->getwrk_op) + : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else - plt_write64(gw.u64[0], ws->getwrk_op); + plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | @@ -130,11 +139,12 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) - : [tag_loc] "r"(ws->tag_wqe_op) + : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 0684417ea..072800c24 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -481,6 +481,88 @@ cn9k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn9k)); } +static int +cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + dws->lookup_mem = lookup_mem; + dws->tstamp = tstmp_info; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } + } +} + +static int +cn9k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn9k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn9k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -494,6 +576,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .port_unlink = cn9k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -571,6 +659,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn9k, cn9k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn9k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn9k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN9K_SSO_SINGLE_WS "=1" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 38fca08fb..f5a440146 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN9K_WORKER_H__ #define __CN9K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn9k_ethdev.h" +#include "cn9k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c index 7189ee3a7..cfd7fb971 100644 --- a/drivers/event/cnxk/cnxk_eventdev.c +++ b/drivers/event/cnxk/cnxk_eventdev.c @@ -571,6 +571,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs) &dev->xae_cnt); rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict, dev); + rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_value, + &dev->force_ena_bp); rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_value, &single_ws); rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 668e51d62..b65d725f5 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -6,6 +6,8 @@ #define __CNXK_EVENTDEV_H__ #include <rte_devargs.h> +#include <rte_ethdev.h> +#include <rte_event_eth_rx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -18,6 +20,7 @@ #define CNXK_SSO_XAE_CNT "xae_cnt" #define CNXK_SSO_GGRP_QOS "qos" +#define CNXK_SSO_FORCE_BP "force_rx_bp" #define CN9K_SSO_SINGLE_WS "single_ws" #define CN10K_SSO_GW_MODE "gw_mode" @@ -81,7 +84,10 @@ struct cnxk_sso_evdev { uint64_t nb_xaq_cfg; rte_iova_t fc_iova; struct rte_mempool *xaq_pool; + uint64_t rx_offloads; uint64_t adptr_xae_cnt; + uint16_t rx_adptr_pool_cnt; + uint64_t *rx_adptr_pools; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -89,25 +95,18 @@ struct cnxk_sso_evdev { uint32_t xae_cnt; uint8_t qos_queue_cnt; struct cnxk_sso_qos *qos_parse_data; + uint8_t force_ena_bp; /* CN9K */ uint8_t dual_ws; /* CN10K */ uint8_t gw_mode; } __rte_cache_aligned; -/* CN10K HWS ops */ -#define CN10K_SSO_HWS_OPS \ - uintptr_t swtag_desched_op; \ - uintptr_t swtag_flush_op; \ - uintptr_t swtag_untag_op; \ - uintptr_t swtag_norm_op; \ - uintptr_t updt_wqe_op; \ - uintptr_t tag_wqe_op; \ - uintptr_t getwrk_op - struct cn10k_sso_hws { - /* Get Work Fastpath data */ - CN10K_SSO_HWS_OPS; + uint64_t base; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint32_t gw_wdata; uint8_t swtag_req; uint8_t hws_id; @@ -115,7 +114,6 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; uintptr_t lmt_base; } __rte_cache_aligned; @@ -132,6 +130,9 @@ struct cn10k_sso_hws { struct cn9k_sso_hws { /* Get Work Fastpath data */ CN9K_SSO_HWS_OPS; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t hws_id; /* Add Work Fastpath data */ @@ -148,6 +149,9 @@ struct cn9k_sso_hws_state { struct cn9k_sso_hws_dual { /* Get Work Fastpath data */ struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */ + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t vws; /* Ping pong bit */ uint8_t hws_id; @@ -250,4 +254,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev, /* CN9K */ void cn9k_sso_set_rsrc(void *arg); +/* Common adapter ops */ +int cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf); +int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id); +int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); +int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); + #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 89a1d82c1..3b7ecb375 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -2,6 +2,7 @@ * Copyright(C) 2021 Marvell. */ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" void @@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, int i; switch (event_type) { + case RTE_EVENT_TYPE_ETHDEV: { + struct cnxk_eth_rxq_sp *rxq = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->rx_adptr_pool_cnt; i++) { + if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i]) + return; + } + + dev->rx_adptr_pool_cnt++; + old_ptr = dev->rx_adptr_pools; + dev->rx_adptr_pools = rte_realloc( + dev->rx_adptr_pools, + sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0); + if (dev->rx_adptr_pools == NULL) { + dev->adptr_xae_cnt += rxq->qconf.mp->size; + dev->rx_adptr_pools = old_ptr; + dev->rx_adptr_pool_cnt--; + return; + } + dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] = + (uint64_t)rxq->qconf.mp; + + dev->adptr_xae_cnt += rxq->qconf.mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; @@ -65,3 +92,134 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, break; } } + +static int +cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, + uint16_t port_id, const struct rte_event *ev, + uint8_t custom_flowid) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 1; + rq->tt = ev->sched_type; + rq->hwgrp = ev->queue_id; + rq->flow_tag_width = 20; + rq->wqe_skip = 1; + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4)) + << 24; + + if (custom_flowid) { + rq->flow_tag_width = 0; + rq->tag_mask |= ev->flow_id; + } + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 0; + rq->flow_tag_width = 32; + rq->tag_mask = 0; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +int +cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t port = eth_dev->data->port_id; + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) + rc |= cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, + i, queue_conf); + } else { + rxq_sp = cnxk_eth_rxq_to_sp( + eth_dev->data->rx_queues[rx_queue_id]); + cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc |= cnxk_sso_rxq_enable( + cnxk_eth_dev, (uint16_t)rx_queue_id, port, + &queue_conf->ev, + !!(queue_conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID)); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, true, + dev->force_ena_bp); + } + + if (rc < 0) { + plt_err("Failed to configure Rx adapter port=%d, q=%d", port, + queue_conf->ev.queue_id); + return rc; + } + + dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags; + + return 0; +} + +int +cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + RTE_SET_USED(event_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) + cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i); + } else { + rxq_sp = cnxk_eth_rxq_to_sp( + eth_dev->data->rx_queues[rx_queue_id]); + rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, false, + dev->force_ena_bp); + } + + if (rc < 0) + plt_err("Failed to clear Rx adapter config port=%d, q=%d", + eth_dev->data->port_id, rx_queue_id); + + return rc; +} + +int +cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +int +cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index 87bb9f76a..eda562f5b 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -21,4 +21,11 @@ sources = files( 'cnxk_tim_worker.c', ) -deps += ['bus_pci', 'common_cnxk'] +extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing'] +foreach flag: extra_flags + if cc.has_argument(flag) + cflags += flag + endif +endforeach + +deps += ['bus_pci', 'common_cnxk', 'net_cnxk'] -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 136 +++++++- drivers/event/cnxk/cn10k_worker.c | 54 ---- drivers/event/cnxk/cn10k_worker.h | 97 +++++- drivers/event/cnxk/cn10k_worker_deq.c | 44 +++ drivers/event/cnxk/cn10k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn10k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_eventdev.c | 305 +++++++++++++++++- drivers/event/cnxk/cn9k_worker.c | 117 ------- drivers/event/cnxk/cn9k_worker.h | 174 ++++++++-- drivers/event/cnxk/cn9k_worker_deq.c | 44 +++ drivers/event/cnxk/cn9k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn9k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_worker_dual_deq.c | 53 +++ .../event/cnxk/cn9k_worker_dual_deq_burst.c | 30 ++ drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 89 +++++ drivers/event/cnxk/cnxk_eventdev.h | 1 + drivers/event/cnxk/meson.build | 9 + 17 files changed, 1124 insertions(+), 231 deletions(-) create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 2060c8fe8..ba7d95fff 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -237,17 +237,141 @@ static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn10k_sso_hws_deq; - event_dev->dequeue_burst = cn10k_sso_hws_deq_burst; - if (dev->is_timeout_deq) { - event_dev->dequeue = cn10k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } } diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index 5dbae275b..c71aa3732 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -60,57 +60,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return 1; - } - - return cn10k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return ret; - } - - ret = cn10k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn10k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index c7250bf9e..b724083ca 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -87,20 +87,37 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, cn10k_sso_hws_fwd_group(ws, ev, grp); } +static __rte_always_inline void +cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t -cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) +cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, void *lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; gw.get_work = ws->gw_wdata; #if defined(RTE_ARCH_ARM64) && !defined(__clang__) asm volatile( PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" - : [wdata] "+r"(gw.get_work) + "sub %[mbuf], %H[wdata], #0x80 \n" + : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf) : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else @@ -109,11 +126,34 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -128,6 +168,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -138,7 +179,9 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " ldp %[tag], %[wqp], [%[tag_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else @@ -146,12 +189,25 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -170,16 +226,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn10k_worker_deq.c b/drivers/event/cnxk/cn10k_worker_deq.c new file mode 100644 index 000000000..36ec454cc --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_burst.c b/drivers/event/cnxk/cn10k_worker_deq_burst.c new file mode 100644 index 000000000..29ecc551c --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_tmo.c b/drivers/event/cnxk/cn10k_worker_deq_tmo.c new file mode 100644 index 000000000..c8524a27b --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 072800c24..e386cb784 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -252,17 +252,202 @@ static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + /* Single WS modes */ + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + /* Dual WS modes */ + const event_dequeue_t sso_hws_dual_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn9k_sso_hws_deq; - event_dev->dequeue_burst = cn9k_sso_hws_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } if (dev->dual_ws) { @@ -272,14 +457,110 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) event_dev->enqueue_forward_burst = cn9k_sso_hws_dual_enq_fwd_burst; - event_dev->dequeue = cn9k_sso_hws_dual_deq; - event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq; - event_dev->dequeue_burst = - cn9k_sso_hws_dual_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_dual_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_dual_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } } } + + rte_mb(); } static void * diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c index 9ceacc98d..538bc4b0b 100644 --- a/drivers/event/cnxk/cn9k_worker.c +++ b/drivers/event/cnxk/cn9k_worker.c @@ -60,60 +60,6 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } -uint16_t __rte_hot -cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return 1; - } - - return cn9k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return ret; - } - - ret = cn9k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn9k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} - /* Dual ws ops. */ uint16_t __rte_hot @@ -171,66 +117,3 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t gw; - - RTE_SET_USED(timeout_ticks); - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return 1; - } - - gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - return gw; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t ret = 1; - uint64_t iter; - - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return ret; - } - - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - } - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index f5a440146..c01c00e1d 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -128,17 +128,36 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, } } +static __rte_always_inline void +cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, struct cn9k_sso_hws_state *ws_pair, - struct rte_event *ev) + struct rte_event *ev, const uint32_t flags, + const void *const lookup_mem, + struct cnxk_timesync_info *const tstamp) { const uint64_t set_gw = BIT_ULL(16) | 1; union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE "rty%=: \n" @@ -147,7 +166,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, " tbnz %[tag], 63, rty%= \n" "done%=: str %[gw], [%[pong]] \n" " dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op), [gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op)); #else @@ -156,12 +178,34 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); plt_write64(set_gw, ws_pair->getwrk_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -169,16 +213,22 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, } static __rte_always_inline uint16_t -cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) +cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, const void *const lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; plt_write64(BIT_ULL(16) | /* wait for work. */ 1, /* Use Mask set 0. */ ws->getwrk_op); + + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE " ldr %[tag], [%[tag_loc]] \n" @@ -190,7 +240,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -198,12 +251,35 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -218,6 +294,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -230,7 +307,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -238,12 +317,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -274,28 +366,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); - -uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c new file mode 100644 index 000000000..51ccaf4ec --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_burst.c b/drivers/event/cnxk/cn9k_worker_deq_burst.c new file mode 100644 index 000000000..4e2801459 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c new file mode 100644 index 000000000..9713d1ef0 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c new file mode 100644 index 000000000..709fa2d9e --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c new file mode 100644 index 000000000..d50e1cf83 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c new file mode 100644 index 000000000..a0508fdf0 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_##name(port, ev, \ + timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index b65d725f5..9d5d2d033 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -33,6 +33,7 @@ #define CNXK_SSO_MZ_NAME "cnxk_evdev_mz" #define CNXK_SSO_XAQ_CACHE_CNT (0x7) #define CNXK_SSO_XAQ_SLACK (8) +#define CNXK_SSO_WQE_SG_PTR (9) #define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) #define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY) diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index eda562f5b..c5c1c0ee8 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -11,8 +11,17 @@ endif sources = files( 'cn9k_eventdev.c', 'cn9k_worker.c', + 'cn9k_worker_deq.c', + 'cn9k_worker_deq_burst.c', + 'cn9k_worker_deq_tmo.c', + 'cn9k_worker_dual_deq.c', + 'cn9k_worker_dual_deq_burst.c', + 'cn9k_worker_dual_deq_tmo.c', 'cn10k_eventdev.c', 'cn10k_worker.c', + 'cn10k_worker_deq.c', + 'cn10k_worker_deq_burst.c', + 'cn10k_worker_deq_tmo.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 4 +- doc/guides/rel_notes/release_21_08.rst | 6 +- drivers/common/cnxk/roc_nix.h | 1 + drivers/common/cnxk/roc_nix_queue.c | 8 +- drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++ drivers/event/cnxk/cn9k_eventdev.c | 148 +++++++++++++++++++++++ drivers/event/cnxk/cnxk_eventdev.h | 22 +++- drivers/event/cnxk/cnxk_eventdev_adptr.c | 88 ++++++++++++++ 8 files changed, 359 insertions(+), 9 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index b7e82c127..6fdccc2ab 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -42,7 +42,9 @@ Features of the OCTEON cnxk SSO PMD are: - HW managed packets enqueued from ethdev to eventdev exposed through event eth RX adapter. - N:1 ethernet device Rx queue to Event queue mapping. -- Full Rx offload support defined through ethdev queue configuration. +- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` + capability while maintaining receive packet order. +- Full Rx/Tx offload support defined through ethdev queue configuration. Prerequisites and Compilation procedure --------------------------------------- diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 3892c8017..80ff93269 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -60,10 +60,10 @@ New Features * Added net/cnxk driver which provides the support for the integrated ethernet device. -* **Added support for Marvell CN10K, CN9K, event Rx adapter.** +* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.** - * Added Rx adapter support for event/cnxk when the ethernet device requested is - net/cnxk. + * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested + is net/cnxk. Removed Items diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index 76613fe84..822c1900e 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -200,6 +200,7 @@ struct roc_nix_sq { uint64_t aura_handle; int16_t nb_sqb_bufs_adj; uint16_t nb_sqb_bufs; + uint16_t aura_sqb_bufs; plt_iova_t io_addr; void *lmt_addr; void *sqe_mem; diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c index 0604e7a18..7e2f86eca 100644 --- a/drivers/common/cnxk/roc_nix_queue.c +++ b/drivers/common/cnxk/roc_nix_queue.c @@ -587,12 +587,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) aura.fc_ena = 1; aura.fc_addr = (uint64_t)sq->fc; aura.fc_hyst_bits = 0; /* Store count on all updates */ - rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, nb_sqb_bufs, &aura, + rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, NIX_MAX_SQB, &aura, &pool); if (rc) goto fail; - sq->sqe_mem = plt_zmalloc(blk_sz * nb_sqb_bufs, blk_sz); + sq->sqe_mem = plt_zmalloc(blk_sz * NIX_MAX_SQB, blk_sz); if (sq->sqe_mem == NULL) { rc = NIX_ERR_NO_MEM; goto nomem; @@ -600,11 +600,13 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) /* Fill the initial buffers */ iova = (uint64_t)sq->sqe_mem; - for (count = 0; count < nb_sqb_bufs; count++) { + for (count = 0; count < NIX_MAX_SQB; count++) { roc_npa_aura_op_free(sq->aura_handle, 0, iova); iova += blk_sz; } roc_npa_aura_op_range_set(sq->aura_handle, (uint64_t)sq->sqe_mem, iova); + roc_npa_aura_limit_modify(sq->aura_handle, sq->nb_sqb_bufs); + sq->aura_sqb_bufs = NIX_MAX_SQB; return rc; nomem: diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index ba7d95fff..8a9b04a3d 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); + ws->tx_base = ws->base; ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn10k_sso_hws) + + (sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + + return 0; +} + static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset, cn10k_sso_hws_flush_events); if (rc < 0) @@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + return cn10k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index e386cb784..21f80323d 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(dws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + dws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&dws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = dws; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + } + rte_mb(); + + return 0; +} + static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset, cn9k_sso_hws_flush_events); if (rc < 0) @@ -844,6 +908,86 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static void +cn9k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id, + bool ena) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cn9k_eth_txq *txq; + struct roc_nix_sq *sq; + int i; + + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) + cn9k_sso_txq_fc_update(eth_dev, i, ena); + } else { + uint16_t sq_limit; + + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + txq = eth_dev->data->tx_queues[tx_queue_id]; + sq_limit = + ena ? RTE_MIN(CNXK_SSO_SQB_LIMIT, sq->aura_sqb_bufs) : + sq->nb_sqb_bufs; + txq->nb_sqb_bufs_adj = + sq_limit - + RTE_ALIGN_MUL_CEIL(sq_limit, + (1ULL << txq->sqes_per_sqb_log2)) / + (1ULL << txq->sqes_per_sqb_log2); + txq->nb_sqb_bufs_adj = (70 * txq->nb_sqb_bufs_adj) / 100; + } +} + +static int +cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, true); + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, false); + return cn9k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -863,6 +1007,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 9d5d2d033..24e1be6a9 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -8,6 +8,7 @@ #include <rte_devargs.h> #include <rte_ethdev.h> #include <rte_event_eth_rx_adapter.h> +#include <rte_event_eth_tx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -34,6 +35,7 @@ #define CNXK_SSO_XAQ_CACHE_CNT (0x7) #define CNXK_SSO_XAQ_SLACK (8) #define CNXK_SSO_WQE_SG_PTR (9) +#define CNXK_SSO_SQB_LIMIT (0x180) #define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) #define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY) @@ -86,9 +88,12 @@ struct cnxk_sso_evdev { rte_iova_t fc_iova; struct rte_mempool *xaq_pool; uint64_t rx_offloads; + uint64_t tx_offloads; uint64_t adptr_xae_cnt; uint16_t rx_adptr_pool_cnt; uint64_t *rx_adptr_pools; + uint64_t *tx_adptr_data; + uint16_t max_port_id; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -115,7 +120,10 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; + /* Tx Fastpath data */ + uint64_t tx_base __rte_cache_aligned; uintptr_t lmt_base; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; /* CN9K HWS ops */ @@ -140,7 +148,9 @@ struct cn9k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; + /* Tx Fastpath data */ + uint64_t base __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cn9k_sso_hws_state { @@ -160,7 +170,9 @@ struct cn9k_sso_hws_dual { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base[2]; + /* Tx Fastpath data */ + uint64_t base[2] __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cnxk_sso_hws_cookie { @@ -267,5 +279,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); +int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); +int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 3b7ecb375..502da272d 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -223,3 +223,91 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, return 0; } + +static int +cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs) +{ + return roc_npa_aura_limit_modify( + sq->aura_handle, RTE_MIN(nb_sqb_bufs, sq->aura_sqb_bufs)); +} + +static int +cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev, + uint16_t eth_port_id, uint16_t tx_queue_id, + void *txq) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t max_port_id = dev->max_port_id; + uint64_t *txq_data = dev->tx_adptr_data; + + if (txq_data == NULL || eth_port_id > max_port_id) { + max_port_id = RTE_MAX(max_port_id, eth_port_id); + txq_data = rte_realloc_socket( + txq_data, + (sizeof(uint64_t) * (max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, event_dev->data->socket_id); + if (txq_data == NULL) + return -ENOMEM; + } + + ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) + txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq; + dev->max_port_id = max_port_id; + dev->tx_adptr_data = txq_data; + return 0; +} + +int +cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct roc_nix_sq *sq; + int i, ret; + void *txq; + + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) + cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, i); + } else { + txq = eth_dev->data->tx_queues[tx_queue_id]; + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, txq); + if (ret < 0) + return ret; + + dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags; + } + + return 0; +} + +int +cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct roc_nix_sq *sq; + int i, ret; + + RTE_SET_USED(event_dev); + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) + cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, i); + } else { + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, NULL); + if (ret < 0) + return ret; + } + + return 0; +} -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 38 ++++++++ drivers/event/cnxk/cn10k_worker.h | 67 +++++++++++++ drivers/event/cnxk/cn10k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn10k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_eventdev.c | 81 ++++++++++++++++ drivers/event/cnxk/cn9k_worker.h | 97 +++++++++++++++++++ drivers/event/cnxk/cn9k_worker_dual_tx_enq.c | 23 +++++ .../event/cnxk/cn9k_worker_dual_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cnxk_worker.h | 27 +++--- drivers/event/cnxk/meson.build | 6 ++ 12 files changed, 440 insertions(+), 14 deletions(-) create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 8a9b04a3d..e462f770c 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; @@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; } static void diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index b724083ca..3c90c8500 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -11,6 +11,7 @@ #include "cn10k_ethdev.h" #include "cn10k_rx.h" +#include "cn10k_tx.h" /* SSO Operations */ @@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn10k_eth_txq * +cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn10k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline uint16_t +cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, + uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + const struct cn10k_eth_txq *txq; + struct rte_mbuf *m = ev->mbuf; + uint16_t ref_cnt = m->refcnt; + uintptr_t lmt_addr; + uint16_t lmt_id; + uintptr_t pa; + + lmt_addr = ws->lmt_base; + ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + txq = cn10k_sso_hws_xtract_meta(m, txq_data); + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; + } + if (!ev->sched_type) + cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, + ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c new file mode 100644 index 000000000..f9968ac0d --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c new file mode 100644 index 000000000..a24fc42e5 --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 21f80323d..a69edff19 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -430,6 +430,39 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; @@ -510,6 +543,25 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) } } + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + if (dev->dual_ws) { event_dev->enqueue = cn9k_sso_hws_dual_enq; event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst; @@ -618,8 +670,37 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] + */ + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } } + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; rte_mb(); } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index c01c00e1d..3f9751211 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -11,6 +11,7 @@ #include "cn9k_ethdev.h" #include "cn9k_rx.h" +#include "cn9k_tx.h" /* SSO Operations */ @@ -416,4 +417,100 @@ NIX_RX_FASTPATH_MODES NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline void +cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq) +{ + while (!(((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) + << (txq)->sqes_per_sqb_log2)) + ; +} + +static __rte_always_inline const struct cn9k_eth_txq * +cn9k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn9k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline void +cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m, + uint64_t *cmd, const uint32_t flags) +{ + roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags)); + cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt); +} + +static __rte_always_inline uint16_t +cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + struct rte_mbuf *m = ev->mbuf; + const struct cn9k_eth_txq *txq; + uint16_t ref_cnt = m->refcnt; + + /* Perform header writes before barrier for TSO */ + cn9k_nix_xmit_prepare_tso(m, flags); + /* Lets commit any changes in the packet here in case when + * fast free is set as no further changes will be made to mbuf. + * In case of fast free is not set, both cn9k_nix_prepare_mseg() + * and cn9k_nix_xmit_prepare() has a barrier after refcnt update. + */ + if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_io_wmb(); + txq = cn9k_sso_hws_xtract_meta(m, txq_data); + cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags); + + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + cn9k_sso_txq_fc_wait(txq); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, + txq->io_addr, segdw); + } else { + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, + segdw); + } + } else { + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + cn9k_sso_txq_fc_wait(txq); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_one(cmd, txq->lmt_addr, + txq->io_addr, flags); + } else { + cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, + flags); + } + } + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG, + base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c new file mode 100644 index 000000000..92e2981f0 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws_dual *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c new file mode 100644 index 000000000..dfb574cf9 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws_dual *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c new file mode 100644 index 000000000..3df649c0c --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c new file mode 100644 index 000000000..0efe29113 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h index 4eb46ae16..7891b749d 100644 --- a/drivers/event/cnxk/cnxk_worker.h +++ b/drivers/event/cnxk/cnxk_worker.h @@ -79,21 +79,20 @@ static __rte_always_inline void cnxk_sso_hws_head_wait(uintptr_t tag_op) { #ifdef RTE_ARCH_ARM64 - uint64_t swtp; - - asm volatile(PLT_CPU_FEATURE_PREAMBLE - " ldr %[swtb], [%[swtp_loc]] \n" - " tbz %[swtb], 35, done%= \n" - " sevl \n" - "rty%=: wfe \n" - " ldr %[swtb], [%[swtp_loc]] \n" - " tbnz %[swtb], 35, rty%= \n" - "done%=: \n" - : [swtb] "=&r"(swtp) - : [swtp_loc] "r"(tag_op)); + uint64_t tag; + + asm volatile(" ldr %[tag], [%[tag_op]] \n" + " tbnz %[tag], 35, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[tag], [%[tag_op]] \n" + " tbz %[tag], 35, rty%= \n" + "done%=: \n" + : [tag] "=&r"(tag) + : [tag_op] "r"(tag_op)); #else - /* Wait for the SWTAG/SWTAG_FULL operation */ - while (plt_read64(tag_op) & BIT_ULL(35)) + /* Wait for the HEAD to be set */ + while (!(plt_read64(tag_op) & BIT_ULL(35))) ; #endif } diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index c5c1c0ee8..13e0634e8 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -17,11 +17,17 @@ sources = files( 'cn9k_worker_dual_deq.c', 'cn9k_worker_dual_deq_burst.c', 'cn9k_worker_dual_deq_tmo.c', + 'cn9k_worker_tx_enq.c', + 'cn9k_worker_tx_enq_seg.c', + 'cn9k_worker_dual_tx_enq.c', + 'cn9k_worker_dual_tx_enq_seg.c', 'cn10k_eventdev.c', 'cn10k_worker.c', 'cn10k_worker_deq.c', 'cn10k_worker_deq_burst.c', 'cn10k_worker_deq_tmo.c', + 'cn10k_worker_tx_enq.c', + 'cn10k_worker_tx_enq_seg.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add event vector support for cnxk event Rx adapter, add control path APIs to get vector limits and ability to configure event vectorization on a given Rx queue. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 2 + drivers/event/cnxk/cn10k_eventdev.c | 106 ++++++++++++++++++++++- drivers/event/cnxk/cnxk_eventdev.h | 2 + drivers/event/cnxk/cnxk_eventdev_adptr.c | 25 ++++++ drivers/net/cnxk/cnxk_ethdev.h | 2 +- 5 files changed, 135 insertions(+), 2 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 6fdccc2ab..0297cd3d5 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -45,6 +45,8 @@ Features of the OCTEON cnxk SSO PMD are: - Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` capability while maintaining receive packet order. - Full Rx/Tx offload support defined through ethdev queue configuration. +- HW managed event vectorization on CN10K for packets enqueued from ethdev to + eventdev configurable per each Rx queue in Rx adapter. Prerequisites and Compilation procedure --------------------------------------- diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e462f770c..e85fa4785 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, else *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | - RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID | + RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR; return 0; } @@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_rx_adapter_vector_limits( + const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, + struct rte_event_eth_rx_adapter_vector_limits *limits) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + return -ENOTSUP; + + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + limits->log2_sz = true; + limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2; + limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2; + limits->min_timeout_ns = + (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100; + limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns; + + return 0; +} + +static int +cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev, + uint16_t port_id, uint16_t rq_id, uint16_t sz, + uint64_t tmo_ns, struct rte_mempool *vmp) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + + if (!rq->sso_ena) + return -EINVAL; + if (rq->flow_tag_width == 0) + return -EINVAL; + + rq->vwqe_ena = 1; + rq->vwqe_first_skip = 0; + rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id); + rq->vwqe_max_sz_exp = rte_log2_u32(sz); + rq->vwqe_wait_tmo = + tmo_ns / + ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100); + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= + (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4)) + << 24; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cn10k_sso_rx_adapter_vector_config( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_event_vector_config *config) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + struct cnxk_sso_evdev *dev; + int i, rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + dev = cnxk_sso_pmd_priv(event_dev); + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, i, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + } else { + + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, rx_queue_id, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + + return 0; +} + static int cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, uint32_t *caps) @@ -739,6 +839,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_rx_adapter_vector_limits_get = cn10k_sso_rx_adapter_vector_limits, + .eth_rx_adapter_event_vector_config = + cn10k_sso_rx_adapter_vector_config, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 24e1be6a9..fc49b88d6 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -97,6 +97,8 @@ struct cnxk_sso_evdev { uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; + uint16_t vec_pool_cnt; + uint64_t *vec_pools; /* Dev args */ uint32_t xae_cnt; uint8_t qos_queue_cnt; diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 502da272d..baf2f2aa6 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -38,6 +38,31 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, dev->adptr_xae_cnt += rxq->qconf.mp->size; break; } + case RTE_EVENT_TYPE_ETHDEV_VECTOR: { + struct rte_mempool *mp = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->vec_pool_cnt; i++) { + if ((uint64_t)mp == dev->vec_pools[i]) + return; + } + + dev->vec_pool_cnt++; + old_ptr = dev->vec_pools; + dev->vec_pools = + rte_realloc(dev->vec_pools, + sizeof(uint64_t) * dev->vec_pool_cnt, 0); + if (dev->vec_pools == NULL) { + dev->adptr_xae_cnt += mp->size; + dev->vec_pools = old_ptr; + dev->vec_pool_cnt--; + return; + } + dev->vec_pools[dev->vec_pool_cnt - 1] = (uint64_t)mp; + + dev->adptr_xae_cnt += mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h index 4eead0390..2528b3cda 100644 --- a/drivers/net/cnxk/cnxk_ethdev.h +++ b/drivers/net/cnxk/cnxk_ethdev.h @@ -238,7 +238,7 @@ struct cnxk_eth_txq_sp { } __plt_cache_aligned; static inline struct cnxk_eth_dev * -cnxk_eth_pmd_priv(struct rte_eth_dev *eth_dev) +cnxk_eth_pmd_priv(const struct rte_eth_dev *eth_dev) { return eth_dev->data->dev_private; } -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Rx event vector fastpath to convert HW defined metadata into rte_mbuf and rte_event_vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/rel_notes/release_21_08.rst | 1 + drivers/event/cnxk/cn10k_worker.h | 56 +++++++ drivers/net/cnxk/cn10k_rx.h | 200 +++++++++++++++---------- drivers/net/cnxk/cn10k_rx_vec.c | 2 +- drivers/net/cnxk/cn10k_rx_vec_mseg.c | 5 +- 5 files changed, 179 insertions(+), 85 deletions(-) diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 80ff93269..11ccc9bcb 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -64,6 +64,7 @@ New Features * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested is net/cnxk. + * Add support for event vectorization for Rx adapter. Removed Items diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 3c90c8500..7a48a6b17 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,6 +5,8 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include <rte_vect.h> + #include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" @@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, mbuf_init | ((uint64_t)port_id) << 48, flags); } +static __rte_always_inline void +cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, + void *lookup_mem, void *tstamp) +{ + uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + struct rte_event_vector *vec; + uint16_t nb_mbufs, non_vec; + uint64_t **wqe; + + mbuf_init |= ((uint64_t)port_id) << 48; + vec = (struct rte_event_vector *)vwqe; + wqe = vec->u64s; + + nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP); + nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs, + flags | NIX_RX_VWQE_F, lookup_mem, + tstamp); + wqe += nb_mbufs; + non_vec = vec->nb_elem - nb_mbufs; + + while (non_vec) { + struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0]; + struct rte_mbuf *mbuf; + uint64_t tstamp_ptr; + + mbuf = (struct rte_mbuf *)((char *)cqe - + sizeof(struct rte_mbuf)); + cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, + mbuf_init, flags); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + wqe[0] = (uint64_t *)mbuf; + non_vec--; + wqe++; + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, const uint32_t flags, void *lookup_mem) @@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, flags & NIX_RX_MULTI_SEG_F, (uint64_t *)tstamp_ptr); gw.u64[1] = mbuf; + } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV_VECTOR) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1]; + + vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | + ((vwqe_hdr & 0xFFFF) << 48) | + ((uint64_t)port << 32); + *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr; + cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem, + ws->tstamp); } } diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index d9572b19e..a506a867c 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -21,6 +21,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_RX_VWQE_F BIT(14) #define NIX_RX_MULTI_SEG_F BIT(15) #define CNXK_NIX_CQ_ENTRY_SZ 128 @@ -28,6 +29,11 @@ #define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x)) #define CQE_SZ(x) ((x) * CNXK_NIX_CQ_ENTRY_SZ) +#define CQE_PTR_OFF(b, i, o, f) \ + (((f) & NIX_RX_VWQE_F) ? \ + (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \ + (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o))) + union mbuf_initializer { struct { uint16_t data_off; @@ -317,61 +323,87 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf) } static __rte_always_inline uint16_t -cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) +cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, + const uint16_t flags, void *lookup_mem, + struct cnxk_timesync_info *tstamp) { - struct cn10k_eth_rxq *rxq = rx_queue; - uint16_t packets = 0; + struct cn10k_eth_rxq *rxq = args; + const uint64_t mbuf_initializer = (flags & NIX_RX_VWQE_F) ? + *(uint64_t *)args : + rxq->mbuf_initializer; + const uint64x2_t data_off = flags & NIX_RX_VWQE_F ? + vdupq_n_u64(0x80ULL) : + vdupq_n_u64(rxq->data_off); + const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask; + const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata; + const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc; uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23; - const uint64_t mbuf_initializer = rxq->mbuf_initializer; - const uint64x2_t data_off = vdupq_n_u64(rxq->data_off); uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3; uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer); struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3; - const uint16_t *lookup_mem = rxq->lookup_mem; - const uint32_t qmask = rxq->qmask; - const uint64_t wdata = rxq->wdata; - const uintptr_t desc = rxq->desc; uint8x16_t f0, f1, f2, f3; - uint32_t head = rxq->head; + uint16_t packets = 0; uint16_t pkts_left; - - pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); - pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); - - /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + uint32_t head; + uintptr_t cq0; + + if (!(flags & NIX_RX_VWQE_F)) { + lookup_mem = rxq->lookup_mem; + head = rxq->head; + + pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); + pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); + /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) + tstamp = rxq->tstamp; + } else { + RTE_SET_USED(head); + } while (packets < pkts) { - /* Exit loop if head is about to wrap and become unaligned */ - if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < - NIX_DESCS_PER_LOOP) { - pkts_left += (pkts - packets); - break; - } + if (!(flags & NIX_RX_VWQE_F)) { + /* Exit loop if head is about to wrap and become + * unaligned. + */ + if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < + NIX_DESCS_PER_LOOP) { + pkts_left += (pkts - packets); + break; + } - const uintptr_t cq0 = desc + CQE_SZ(head); + cq0 = desc + CQE_SZ(head); + } else { + cq0 = (uintptr_t)&mbufs[packets]; + } /* Prefetch N desc ahead */ - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(8))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(9))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(10))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(11))); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 8, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 9, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 10, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 11, 0, flags)); /* Get NIX_RX_SG_S for size and buffer pointer */ - cq0_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(0) + 64)); - cq1_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(1) + 64)); - cq2_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(2) + 64)); - cq3_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(3) + 64)); - - /* Extract mbuf from NIX_RX_SG_S */ - mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); - mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); - mbuf01 = vqsubq_u64(mbuf01, data_off); - mbuf23 = vqsubq_u64(mbuf23, data_off); + cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags)); + cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags)); + cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags)); + cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags)); + + if (!(flags & NIX_RX_VWQE_F)) { + /* Extract mbuf from NIX_RX_SG_S */ + mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); + mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); + mbuf01 = vqsubq_u64(mbuf01, data_off); + mbuf23 = vqsubq_u64(mbuf23, data_off); + } else { + mbuf01 = + vsubq_u64(vld1q_u64((uint64_t *)cq0), data_off); + mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)), + data_off); + } /* Move mbufs to scalar registers for future use */ mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0); @@ -395,14 +427,14 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, f3 = vqtbl1q_u8(cq3_w8, shuf_msk); /* Load CQE word0 and word 1 */ - uint64_t cq0_w0 = ((uint64_t *)(cq0 + CQE_SZ(0)))[0]; - uint64_t cq0_w1 = ((uint64_t *)(cq0 + CQE_SZ(0)))[1]; - uint64_t cq1_w0 = ((uint64_t *)(cq0 + CQE_SZ(1)))[0]; - uint64_t cq1_w1 = ((uint64_t *)(cq0 + CQE_SZ(1)))[1]; - uint64_t cq2_w0 = ((uint64_t *)(cq0 + CQE_SZ(2)))[0]; - uint64_t cq2_w1 = ((uint64_t *)(cq0 + CQE_SZ(2)))[1]; - uint64_t cq3_w0 = ((uint64_t *)(cq0 + CQE_SZ(3)))[0]; - uint64_t cq3_w1 = ((uint64_t *)(cq0 + CQE_SZ(3)))[1]; + const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags); + const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 1, flags); + const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags); + const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 1, flags); + const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags); + const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 1, flags); + const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags); + const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 1, flags); if (flags & NIX_RX_OFFLOAD_RSS_F) { /* Fill rss in the rx_descriptor_fields1 */ @@ -459,17 +491,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) { ol_flags0 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(0) + 38), ol_flags0, - mbuf0); + *(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags), + ol_flags0, mbuf0); ol_flags1 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(1) + 38), ol_flags1, - mbuf1); + *(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags), + ol_flags1, mbuf1); ol_flags2 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(2) + 38), ol_flags2, - mbuf2); + *(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags), + ol_flags2, mbuf2); ol_flags3 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(3) + 38), ol_flags3, - mbuf3); + *(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags), + ol_flags3, mbuf3); } if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { @@ -488,7 +520,7 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, RTE_PTYPE_L2_ETHER_TIMESYNC}; const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | PKT_RX_IEEE1588_TMST | - rxq->tstamp->rx_tstamp_dynflag; + tstamp->rx_tstamp_dynflag; const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; uint64x2_t ts01, ts23, mask; uint64_t ts[4]; @@ -526,14 +558,10 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, ts[3] = vgetq_lane_u64(ts23, 1); /* Store timestamp into dynfield. */ - *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = - ts[0]; - *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = - ts[1]; - *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = - ts[2]; - *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = - ts[3]; + *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3]; /* Generate ptype mask to filter L2 ether timesync */ mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); @@ -559,9 +587,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, /* Update Rxq timestamp with the latest * timestamp. */ - rxq->tstamp->rx_ready = 1; - rxq->tstamp->rx_tstamp = - ts[31 - __builtin_clz(res)]; + tstamp->rx_ready = 1; + tstamp->rx_tstamp = ts[31 - __builtin_clz(res)]; } } @@ -584,25 +611,25 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); /* Store the mbufs to rx_pkts */ - vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); - vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); + vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); if (flags & NIX_RX_MULTI_SEG_F) { /* Multi segment is enable build mseg list for * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 0, 8, flags)), + mbuf0, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 1, 8, flags)), + mbuf1, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 2, 8, flags)), + mbuf2, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 3, 8, flags)), + mbuf3, mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; @@ -623,12 +650,18 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, __mempool_check_cookies(mbuf2->pool, (void **)&mbuf2, 1, 1); __mempool_check_cookies(mbuf3->pool, (void **)&mbuf3, 1, 1); - /* Advance head pointer and packets */ - head += NIX_DESCS_PER_LOOP; - head &= qmask; packets += NIX_DESCS_PER_LOOP; + + if (!(flags & NIX_RX_VWQE_F)) { + /* Advance head pointer and packets */ + head += NIX_DESCS_PER_LOOP; + head &= qmask; + } } + if (flags & NIX_RX_VWQE_F) + return packets; + rxq->head = head; rxq->available -= packets; @@ -637,8 +670,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, plt_write64((rxq->wdata | packets), rxq->cq_door); if (unlikely(pkts_left)) - packets += cn10k_nix_recv_pkts(rx_queue, &rx_pkts[packets], - pkts_left, flags); + packets += cn10k_nix_recv_pkts(args, &mbufs[packets], pkts_left, + flags); return packets; } @@ -647,12 +680,15 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, static inline uint16_t cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) + uint16_t pkts, const uint16_t flags, + void *lookup_mem, void *tstamp) { + RTE_SET_USED(lookup_mem); RTE_SET_USED(rx_queue); RTE_SET_USED(rx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(flags); + RTE_SET_USED(tstamp); return 0; } diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c index 93528a44f..166735ad5 100644 --- a/drivers/net/cnxk/cn10k_rx_vec.c +++ b/drivers/net/cnxk/cn10k_rx_vec.c @@ -12,7 +12,7 @@ uint16_t pkts) \ { \ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags)); \ + (flags), NULL, NULL); \ } NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c index 04d1e46c8..1f44ddddd 100644 --- a/drivers/net/cnxk/cn10k_rx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c @@ -9,8 +9,9 @@ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ { \ - return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags) | NIX_RX_MULTI_SEG_F); \ + return cn10k_nix_recv_pkts_vector( \ + rx_queue, rx_pkts, pkts, (flags) | NIX_RX_MULTI_SEG_F, \ + NULL, NULL); \ } NIX_RX_FASTPATH_MODES -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Tx event vector fastpath, integrate event vector Tx routine into Tx burst. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- doc/guides/eventdevs/cnxk.rst | 1 + doc/guides/rel_notes/release_21_08.rst | 2 +- drivers/common/cnxk/roc_sso.h | 23 ++++++ drivers/event/cnxk/cn10k_eventdev.c | 3 +- drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++-- drivers/event/cnxk/cn9k_worker.h | 4 +- drivers/event/cnxk/cnxk_worker.h | 22 ------ drivers/net/cnxk/cn10k_tx.c | 2 +- drivers/net/cnxk/cn10k_tx.h | 52 +++++++++---- drivers/net/cnxk/cn10k_tx_mseg.c | 3 +- drivers/net/cnxk/cn10k_tx_vec.c | 2 +- drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +- 12 files changed, 167 insertions(+), 53 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 0297cd3d5..53560d383 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -47,6 +47,7 @@ Features of the OCTEON cnxk SSO PMD are: - Full Rx/Tx offload support defined through ethdev queue configuration. - HW managed event vectorization on CN10K for packets enqueued from ethdev to eventdev configurable per each Rx queue in Rx adapter. +- Event vector transmission via Tx adapter. Prerequisites and Compilation procedure --------------------------------------- diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 11ccc9bcb..9e49cb27d 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -64,7 +64,7 @@ New Features * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested is net/cnxk. - * Add support for event vectorization for Rx adapter. + * Add support for event vectorization for Rx/Tx adapter. Removed Items diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h index a6030e7d8..316c6ccd5 100644 --- a/drivers/common/cnxk/roc_sso.h +++ b/drivers/common/cnxk/roc_sso.h @@ -44,6 +44,29 @@ struct roc_sso { uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned; } __plt_cache_aligned; +static __rte_always_inline void +roc_sso_hws_head_wait(uintptr_t tag_op) +{ +#ifdef RTE_ARCH_ARM64 + uint64_t tag; + + asm volatile(PLT_CPU_FEATURE_PREAMBLE + " ldr %[tag], [%[tag_op]] \n" + " tbnz %[tag], 35, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[tag], [%[tag_op]] \n" + " tbz %[tag], 35, rty%= \n" + "done%=: \n" + : [tag] "=&r"(tag) + : [tag_op] "r"(tag_op)); +#else + /* Wait for the SWTAG/SWTAG_FULL operation */ + while (!(plt_read64(tag_op) & BIT_ULL(35))) + ; +#endif +} + /* SSO device initialization */ int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso); int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso); diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e85fa4785..6f37c5bd2 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, if (ret) *caps = 0; else - *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR; return 0; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 7a48a6b17..9cc099206 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R -static __rte_always_inline const struct cn10k_eth_txq * +static __rte_always_inline struct cn10k_eth_txq * cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) { - return (const struct cn10k_eth_txq *) + return (struct cn10k_eth_txq *) txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; } +static __rte_always_inline void +cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs, + uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr, + uint8_t sched_type, uintptr_t base, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + uint16_t port[4], queue[4]; + struct cn10k_eth_txq *txq; + uint16_t i, j; + uintptr_t pa; + + for (i = 0; i < nb_mbufs; i += 4) { + port[0] = mbufs[i]->port; + port[1] = mbufs[i + 1]->port; + port[2] = mbufs[i + 2]->port; + port[3] = mbufs[i + 3]->port; + + queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]); + queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]); + queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]); + queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]); + + if (((port[0] ^ port[1]) & (port[2] ^ port[3])) || + ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) { + + for (j = 0; j < 4; j++) { + struct rte_mbuf *m = mbufs[i + j]; + + txq = (struct cn10k_eth_txq *) + txq_data[port[j]][queue[j]]; + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier + * for TSO + */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, + txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg( + m, (uint64_t *)lmt_addr, + flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | + (cn10k_nix_tx_ext_subs(flags) + 1) + << 4; + } + if (!sched_type) + roc_sso_hws_head_wait(base + + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + } + } else { + txq = (struct cn10k_eth_txq *) + txq_data[port[0]][queue[0]]; + cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base + + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], const uint32_t flags) { - const struct cn10k_eth_txq *txq; - struct rte_mbuf *m = ev->mbuf; - uint16_t ref_cnt = m->refcnt; + struct cn10k_eth_txq *txq; + struct rte_mbuf *m; uintptr_t lmt_addr; + uint16_t ref_cnt; uint16_t lmt_id; uintptr_t pa; lmt_addr = ws->lmt_base; ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + + if (ev->event_type & RTE_EVENT_TYPE_VECTOR) { + struct rte_mbuf **mbufs = ev->vec->mbufs; + uint64_t meta = *(uint64_t *)ev->vec; + + if (meta & BIT(31)) { + txq = (struct cn10k_eth_txq *) + txq_data[meta >> 32][meta >> 48]; + + cn10k_nix_xmit_pkts_vector( + txq, mbufs, meta & 0xFFFF, cmd, + ws->tx_base + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } else { + cn10k_sso_vwqe_split_tx( + mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr, + ev->sched_type, ws->tx_base, txq_data, flags); + } + rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec); + return (meta & 0xFFFF); + } + + m = ev->mbuf; + ref_cnt = m->refcnt; txq = cn10k_sso_hws_xtract_meta(m, txq_data); cn10k_nix_tx_skeleton(txq, cmd, flags); /* Perform header writes before barrier for TSO */ @@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; } if (!ev->sched_type) - cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); roc_lmt_submit_steorl(lmt_id, pa); @@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); - return 1; } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 3f9751211..cc1e14195 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -466,7 +466,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); cn9k_sso_txq_fc_wait(txq); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, @@ -478,7 +478,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, } else { if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); cn9k_sso_txq_fc_wait(txq); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_one(cmd, txq->lmt_addr, diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h index 7891b749d..9f9ceab8a 100644 --- a/drivers/event/cnxk/cnxk_worker.h +++ b/drivers/event/cnxk/cnxk_worker.h @@ -75,26 +75,4 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op) #endif } -static __rte_always_inline void -cnxk_sso_hws_head_wait(uintptr_t tag_op) -{ -#ifdef RTE_ARCH_ARM64 - uint64_t tag; - - asm volatile(" ldr %[tag], [%[tag_op]] \n" - " tbnz %[tag], 35, done%= \n" - " sevl \n" - "rty%=: wfe \n" - " ldr %[tag], [%[tag_op]] \n" - " tbz %[tag], 35, rty%= \n" - "done%=: \n" - : [tag] "=&r"(tag) - : [tag_op] "r"(tag_op)); -#else - /* Wait for the HEAD to be set */ - while (!(plt_read64(tag_op) & BIT_ULL(35))) - ; -#endif -} - #endif diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 1f30bab59..0e1276c60 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \ - flags); \ + 0, flags); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index eb148b8e7..f75cae07a 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -18,6 +18,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_TX_VWQE_F BIT(14) #define NIX_TX_MULTI_SEG_F BIT(15) #define NIX_TX_NEED_SEND_HDR_W1 \ @@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) static __rte_always_inline uint16_t cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, - uint64_t *cmd, const uint16_t flags) + uint64_t *cmd, uintptr_t base, const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; const rte_iova_t io_addr = txq->io_addr; @@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t lso_tun_fmt; uint64_t data; - NIX_XMIT_FC_OR_RETURN(txq, pkts); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } /* Get cmd skeleton */ cn10k_nix_tx_skeleton(txq, cmd, flags); - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; - if (flags & NIX_TX_OFFLOAD_TSO_F) lso_tun_fmt = txq->lso_tun_fmt; @@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2); } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (burst > 16) { data = cn10k_nix_tx_steor_data(flags); @@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; uintptr_t pa0, pa1, lmt_addr = txq->lmt_base; @@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, shft += 3; } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + data0 = (uint64_t)data128; data1 = (uint64_t)(data128 >> 64); /* Make data0 similar to data1 */ @@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; @@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64_t data[2]; } wd; - NIX_XMIT_FC_OR_RETURN(txq, pkts); - - scalar = pkts & (NIX_DESCS_PER_LOOP - 1); - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } else { + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + } - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; /* Perform header writes before barrier for TSO */ if (flags & NIX_TX_OFFLOAD_TSO_F) { for (i = 0; i < pkts; i++) @@ -1973,6 +1987,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (flags & NIX_TX_MULTI_SEG_F) wd.data[0] >>= 16; + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (lnum > 16) { if (!(flags & NIX_TX_MULTI_SEG_F)) @@ -2029,10 +2046,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (unlikely(scalar)) { if (flags & NIX_TX_MULTI_SEG_F) pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, - scalar, cmd, flags); + scalar, cmd, base, + flags); else pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, - cmd, flags); + cmd, base, flags); } return pkts; @@ -2041,13 +2059,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, #else static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { RTE_SET_USED(tx_queue); RTE_SET_USED(tx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(cmd); RTE_SET_USED(flags); + RTE_SET_USED(base); return 0; } #endif diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c index 33f675472..4ea4c8a4e 100644 --- a/drivers/net/cnxk/cn10k_tx_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_mseg.c @@ -18,7 +18,8 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \ - (flags) | NIX_TX_MULTI_SEG_F); \ + 0, (flags) \ + | NIX_TX_MULTI_SEG_F); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 34e373750..a0350496a 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -18,7 +18,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ - (flags)); \ + 0, (flags)); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c index 1fad81dba..7f98f79b9 100644 --- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector( \ - tx_queue, tx_pkts, pkts, cmd, \ + tx_queue, tx_pkts, pkts, cmd, 0, \ (flags) | NIX_TX_MULTI_SEG_F); \ } -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter. Resize cn10k workslot fastpath structure to fit in 64B cacheline size. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- v8 Changes: - Fix incorrect cq_w1 offset. - Move doc changes to 1st patch. v7 Changes: - Set correct limits for SQB aura. v6 Changes: - More code cleanup. - Fix incorrect SQB configuration and missing fc check. v5 Changes: - Use cnxk_eth_rxq_to_sp instead of manually calculating sp offset. v4 Changes: - Split patches for easier merge. v3 Changes: - Spell check. doc/guides/eventdevs/cnxk.rst | 33 +++++ doc/guides/rel_notes/release_21_08.rst | 5 + drivers/common/cnxk/roc_nix.h | 3 + drivers/common/cnxk/roc_nix_fc.c | 78 +++++++++++ drivers/common/cnxk/roc_nix_priv.h | 3 +- drivers/common/cnxk/version.map | 1 + drivers/event/cnxk/cn10k_eventdev.c | 107 ++++++++++++--- drivers/event/cnxk/cn10k_worker.c | 7 +- drivers/event/cnxk/cn10k_worker.h | 32 +++-- drivers/event/cnxk/cn9k_eventdev.c | 89 +++++++++++++ drivers/event/cnxk/cn9k_worker.h | 4 + drivers/event/cnxk/cnxk_eventdev.c | 2 + drivers/event/cnxk/cnxk_eventdev.h | 43 ++++-- drivers/event/cnxk/cnxk_eventdev_adptr.c | 158 +++++++++++++++++++++++ drivers/event/cnxk/meson.build | 9 +- 15 files changed, 527 insertions(+), 47 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 36da3800cc..53560d3830 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -39,6 +39,15 @@ Features of the OCTEON cnxk SSO PMD are: time granularity of 2.5us on CN9K and 1us on CN10K. - Up to 256 TIM rings a.k.a event timer adapters. - Up to 8 rings traversed in parallel. +- HW managed packets enqueued from ethdev to eventdev exposed through event eth + RX adapter. +- N:1 ethernet device Rx queue to Event queue mapping. +- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` + capability while maintaining receive packet order. +- Full Rx/Tx offload support defined through ethdev queue configuration. +- HW managed event vectorization on CN10K for packets enqueued from ethdev to + eventdev configurable per each Rx queue in Rx adapter. +- Event vector transmission via Tx adapter. Prerequisites and Compilation procedure --------------------------------------- @@ -93,6 +102,15 @@ Runtime Config Options -a 0002:0e:00.0,qos=[1-50-50-50] +- ``Force Rx Back pressure`` + + Force Rx back pressure when same mempool is used across ethernet device + connected to event device. + + For example:: + + -a 0002:0e:00.0,force_rx_bp=1 + - ``TIM disable NPA`` By default chunks are allocated from NPA then TIM can automatically free @@ -160,3 +178,18 @@ Debugging Options +---+------------+-------------------------------------------------------+ | 2 | TIM | --log-level='pmd\.event\.cnxk\.timer,8' | +---+------------+-------------------------------------------------------+ + +Limitations +----------- + +Rx adapter support +~~~~~~~~~~~~~~~~~~ + +Using the same mempool for all the ethernet device ports connected to +event device would cause back pressure to be asserted only on the first +ethernet device. +Back pressure is automatically disabled when using same mempool for all the +ethernet devices connected to event device to override this applications can +use `force_rx_bp=1` device arguments. +Using unique mempool per each ethernet device is recommended when they are +connected to event device. diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 6a902ef9ac..de5ce86c04 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -117,6 +117,11 @@ New Features The experimental PMD power management API now supports managing multiple Ethernet Rx queues per lcore. +* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.** + * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested + is net/cnxk. + * Add support for event vectorization for Rx/Tx adapter. + Removed Items ------------- diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index bb69027956..76613fe84e 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix); +void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, + uint8_t ena, uint8_t force); + /* NPC */ int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable); diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c index 47be8aa3f8..f17eba4169 100644 --- a/drivers/common/cnxk/roc_nix_fc.c +++ b/drivers/common/cnxk/roc_nix_fc.c @@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode mode) exit: return rc; } + +void +rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena, + uint8_t force) +{ + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + struct npa_lf *lf = idev_npa_obj_get(); + struct npa_aq_enq_req *req; + struct npa_aq_enq_rsp *rsp; + struct mbox *mbox; + uint32_t limit; + int rc; + + if (roc_nix_is_sdp(roc_nix)) + return; + + if (!lf) + return; + mbox = lf->mbox; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_READ; + + rc = mbox_process_msg(mbox, (void *)&rsp); + if (rc) + return; + + limit = rsp->aura.limit; + /* BP is already enabled. */ + if (rsp->aura.bp_ena) { + /* If BP ids don't match disable BP. */ + if ((rsp->aura.nix0_bpid != nix->bpid[0]) && !force) { + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + req->aura.bp_ena = 0; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); + } + return; + } + + /* BP was previously enabled but now disabled skip. */ + if (rsp->aura.bp) + return; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + if (ena) { + req->aura.nix0_bpid = nix->bpid[0]; + req->aura_mask.nix0_bpid = ~(req->aura_mask.nix0_bpid); + req->aura.bp = NIX_RQ_AURA_THRESH( + limit > 128 ? 256 : limit); /* 95% of size*/ + req->aura_mask.bp = ~(req->aura_mask.bp); + } + + req->aura.bp_ena = !!ena; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); +} diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h index d9c32df442..9dc0c88a6f 100644 --- a/drivers/common/cnxk/roc_nix_priv.h +++ b/drivers/common/cnxk/roc_nix_priv.h @@ -16,7 +16,8 @@ #define NIX_SQB_LOWER_THRESH ((uint16_t)70) /* Apply BP/DROP when CQ is 95% full */ -#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_RQ_AURA_THRESH(x) (((x) * 95) / 100) /* IRQ triggered when NIX_LF_CINTX_CNT[QCOUNT] crosses this value */ #define CQ_CQE_THRESH_DEFAULT 0x1ULL diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map index e3af48c02e..8ea3e9f439 100644 --- a/drivers/common/cnxk/version.map +++ b/drivers/common/cnxk/version.map @@ -83,6 +83,7 @@ INTERNAL { roc_nix_fc_config_set; roc_nix_fc_mode_set; roc_nix_fc_mode_get; + rox_nix_fc_npa_bp_cfg; roc_nix_get_base_chan; roc_nix_get_pf; roc_nix_get_pf_func; diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index bf4052c76c..2060c8fe84 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -6,18 +6,6 @@ #include "cnxk_eventdev.h" #include "cnxk_worker.h" -static void -cn10k_init_hws_ops(struct cn10k_sso_hws *ws, uintptr_t base) -{ - ws->tag_wqe_op = base + SSOW_LF_GWS_WQE0; - ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0; - ws->updt_wqe_op = base + SSOW_LF_GWS_OP_UPD_WQP_GRP1; - ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM; - ws->swtag_untag_op = base + SSOW_LF_GWS_OP_SWTAG_UNTAG; - ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH; - ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED; -} - static uint32_t cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev) { @@ -56,7 +44,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); - cn10k_init_hws_ops(ws, ws->base); ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -135,13 +122,14 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cq_ds_cnt &= 0x3FFF3FFF0000; while (aq_cnt || cq_ds_cnt || ds_cnt) { - plt_write64(req, ws->getwrk_op); + plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0); cn10k_sso_hws_get_work_empty(ws, &ev); if (fn != NULL && ev.u64 != 0) fn(arg, ev); if (ev.sched_type != SSO_TT_EMPTY) - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, - ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush( + ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); do { val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE); } while (val & BIT_ULL(56)); @@ -205,9 +193,11 @@ cn10k_sso_hws_reset(void *arg, void *hws) if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) { - plt_write64(BIT_ULL(16) | 1, ws->getwrk_op); + plt_write64(BIT_ULL(16) | 1, + ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0)); if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */ @@ -407,6 +397,80 @@ cn10k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn10k)); } +static int +cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } +} + +static int +cn10k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn10k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn10k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -420,6 +484,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .port_unlink = cn10k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -502,6 +572,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn10k, cn10k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN10K_SSO_GW_MODE "=<int>" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index e2aa534c64..5dbae275ba 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -18,7 +18,8 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev) cn10k_sso_hws_forward_event(ws, ev); break; case RTE_EVENT_OP_RELEASE: - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); break; default: return 0; @@ -69,7 +70,7 @@ cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return 1; } @@ -94,7 +95,7 @@ cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return ret; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 2f093a8dd5..c7250bf9e7 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn10k_ethdev.h" +#include "cn10k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t @@ -31,7 +35,8 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) { const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(ws->tag_wqe_op)); + const uint8_t cur_tt = + CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)); /* CNXK model * cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED @@ -43,9 +48,11 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) if (new_tt == SSO_TT_UNTAGGED) { if (cur_tt != SSO_TT_UNTAGGED) - cnxk_sso_hws_swtag_untag(ws->swtag_untag_op); + cnxk_sso_hws_swtag_untag(ws->base + + SSOW_LF_GWS_OP_SWTAG_UNTAG); } else { - cnxk_sso_hws_swtag_norm(tag, new_tt, ws->swtag_norm_op); + cnxk_sso_hws_swtag_norm(tag, new_tt, + ws->base + SSOW_LF_GWS_OP_SWTAG_NORM); } ws->swtag_req = 1; } @@ -57,8 +64,9 @@ cn10k_sso_hws_fwd_group(struct cn10k_sso_hws *ws, const struct rte_event *ev, const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - plt_write64(ev->u64, ws->updt_wqe_op); - cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op); + plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1); + cnxk_sso_hws_swtag_desched(tag, new_tt, grp, + ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED); } static __rte_always_inline void @@ -68,7 +76,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, const uint8_t grp = ev->queue_id; /* Group hasn't changed, Use SWTAG to forward the event */ - if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_wqe_op)) == grp) + if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp) cn10k_sso_hws_fwd_swtag(ws, ev); else /* @@ -93,12 +101,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" : [wdata] "+r"(gw.get_work) - : [gw_loc] "r"(ws->getwrk_op) + : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else - plt_write64(gw.u64[0], ws->getwrk_op); + plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | @@ -130,11 +139,12 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) - : [tag_loc] "r"(ws->tag_wqe_op) + : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 0684417eab..072800c243 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -481,6 +481,88 @@ cn9k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn9k)); } +static int +cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + dws->lookup_mem = lookup_mem; + dws->tstamp = tstmp_info; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } + } +} + +static int +cn9k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn9k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn9k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -494,6 +576,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .port_unlink = cn9k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -571,6 +659,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn9k, cn9k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn9k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn9k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN9K_SSO_SINGLE_WS "=1" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 38fca08fb6..f5a4401465 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN9K_WORKER_H__ #define __CN9K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn9k_ethdev.h" +#include "cn9k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c index 7189ee3a79..cfd7fb971c 100644 --- a/drivers/event/cnxk/cnxk_eventdev.c +++ b/drivers/event/cnxk/cnxk_eventdev.c @@ -571,6 +571,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs) &dev->xae_cnt); rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict, dev); + rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_value, + &dev->force_ena_bp); rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_value, &single_ws); rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 668e51d62a..b65d725f55 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -6,6 +6,8 @@ #define __CNXK_EVENTDEV_H__ #include <rte_devargs.h> +#include <rte_ethdev.h> +#include <rte_event_eth_rx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -18,6 +20,7 @@ #define CNXK_SSO_XAE_CNT "xae_cnt" #define CNXK_SSO_GGRP_QOS "qos" +#define CNXK_SSO_FORCE_BP "force_rx_bp" #define CN9K_SSO_SINGLE_WS "single_ws" #define CN10K_SSO_GW_MODE "gw_mode" @@ -81,7 +84,10 @@ struct cnxk_sso_evdev { uint64_t nb_xaq_cfg; rte_iova_t fc_iova; struct rte_mempool *xaq_pool; + uint64_t rx_offloads; uint64_t adptr_xae_cnt; + uint16_t rx_adptr_pool_cnt; + uint64_t *rx_adptr_pools; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -89,25 +95,18 @@ struct cnxk_sso_evdev { uint32_t xae_cnt; uint8_t qos_queue_cnt; struct cnxk_sso_qos *qos_parse_data; + uint8_t force_ena_bp; /* CN9K */ uint8_t dual_ws; /* CN10K */ uint8_t gw_mode; } __rte_cache_aligned; -/* CN10K HWS ops */ -#define CN10K_SSO_HWS_OPS \ - uintptr_t swtag_desched_op; \ - uintptr_t swtag_flush_op; \ - uintptr_t swtag_untag_op; \ - uintptr_t swtag_norm_op; \ - uintptr_t updt_wqe_op; \ - uintptr_t tag_wqe_op; \ - uintptr_t getwrk_op - struct cn10k_sso_hws { - /* Get Work Fastpath data */ - CN10K_SSO_HWS_OPS; + uint64_t base; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint32_t gw_wdata; uint8_t swtag_req; uint8_t hws_id; @@ -115,7 +114,6 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; uintptr_t lmt_base; } __rte_cache_aligned; @@ -132,6 +130,9 @@ struct cn10k_sso_hws { struct cn9k_sso_hws { /* Get Work Fastpath data */ CN9K_SSO_HWS_OPS; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t hws_id; /* Add Work Fastpath data */ @@ -148,6 +149,9 @@ struct cn9k_sso_hws_state { struct cn9k_sso_hws_dual { /* Get Work Fastpath data */ struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */ + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t vws; /* Ping pong bit */ uint8_t hws_id; @@ -250,4 +254,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev, /* CN9K */ void cn9k_sso_set_rsrc(void *arg); +/* Common adapter ops */ +int cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf); +int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id); +int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); +int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); + #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 89a1d82c14..3b7ecb375a 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -2,6 +2,7 @@ * Copyright(C) 2021 Marvell. */ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" void @@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, int i; switch (event_type) { + case RTE_EVENT_TYPE_ETHDEV: { + struct cnxk_eth_rxq_sp *rxq = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->rx_adptr_pool_cnt; i++) { + if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i]) + return; + } + + dev->rx_adptr_pool_cnt++; + old_ptr = dev->rx_adptr_pools; + dev->rx_adptr_pools = rte_realloc( + dev->rx_adptr_pools, + sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0); + if (dev->rx_adptr_pools == NULL) { + dev->adptr_xae_cnt += rxq->qconf.mp->size; + dev->rx_adptr_pools = old_ptr; + dev->rx_adptr_pool_cnt--; + return; + } + dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] = + (uint64_t)rxq->qconf.mp; + + dev->adptr_xae_cnt += rxq->qconf.mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; @@ -65,3 +92,134 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, break; } } + +static int +cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, + uint16_t port_id, const struct rte_event *ev, + uint8_t custom_flowid) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 1; + rq->tt = ev->sched_type; + rq->hwgrp = ev->queue_id; + rq->flow_tag_width = 20; + rq->wqe_skip = 1; + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4)) + << 24; + + if (custom_flowid) { + rq->flow_tag_width = 0; + rq->tag_mask |= ev->flow_id; + } + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 0; + rq->flow_tag_width = 32; + rq->tag_mask = 0; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +int +cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t port = eth_dev->data->port_id; + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) + rc |= cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, + i, queue_conf); + } else { + rxq_sp = cnxk_eth_rxq_to_sp( + eth_dev->data->rx_queues[rx_queue_id]); + cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc |= cnxk_sso_rxq_enable( + cnxk_eth_dev, (uint16_t)rx_queue_id, port, + &queue_conf->ev, + !!(queue_conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID)); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, true, + dev->force_ena_bp); + } + + if (rc < 0) { + plt_err("Failed to configure Rx adapter port=%d, q=%d", port, + queue_conf->ev.queue_id); + return rc; + } + + dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags; + + return 0; +} + +int +cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + RTE_SET_USED(event_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) + cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i); + } else { + rxq_sp = cnxk_eth_rxq_to_sp( + eth_dev->data->rx_queues[rx_queue_id]); + rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, false, + dev->force_ena_bp); + } + + if (rc < 0) + plt_err("Failed to clear Rx adapter config port=%d, q=%d", + eth_dev->data->port_id, rx_queue_id); + + return rc; +} + +int +cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +int +cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index 87bb9f76a9..eda562f5b5 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -21,4 +21,11 @@ sources = files( 'cnxk_tim_worker.c', ) -deps += ['bus_pci', 'common_cnxk'] +extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing'] +foreach flag: extra_flags + if cc.has_argument(flag) + cflags += flag + endif +endforeach + +deps += ['bus_pci', 'common_cnxk', 'net_cnxk'] -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 136 +++++++- drivers/event/cnxk/cn10k_worker.c | 54 ---- drivers/event/cnxk/cn10k_worker.h | 97 +++++- drivers/event/cnxk/cn10k_worker_deq.c | 44 +++ drivers/event/cnxk/cn10k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn10k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_eventdev.c | 305 +++++++++++++++++- drivers/event/cnxk/cn9k_worker.c | 117 ------- drivers/event/cnxk/cn9k_worker.h | 174 ++++++++-- drivers/event/cnxk/cn9k_worker_deq.c | 44 +++ drivers/event/cnxk/cn9k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn9k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_worker_dual_deq.c | 53 +++ .../event/cnxk/cn9k_worker_dual_deq_burst.c | 30 ++ drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 89 +++++ drivers/event/cnxk/cnxk_eventdev.h | 1 + drivers/event/cnxk/meson.build | 9 + 17 files changed, 1124 insertions(+), 231 deletions(-) create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 2060c8fe84..ba7d95fff7 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -237,17 +237,141 @@ static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn10k_sso_hws_deq; - event_dev->dequeue_burst = cn10k_sso_hws_deq_burst; - if (dev->is_timeout_deq) { - event_dev->dequeue = cn10k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } } diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index 5dbae275ba..c71aa37327 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -60,57 +60,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return 1; - } - - return cn10k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return ret; - } - - ret = cn10k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn10k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index c7250bf9e7..b724083caa 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -87,20 +87,37 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, cn10k_sso_hws_fwd_group(ws, ev, grp); } +static __rte_always_inline void +cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t -cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) +cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, void *lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; gw.get_work = ws->gw_wdata; #if defined(RTE_ARCH_ARM64) && !defined(__clang__) asm volatile( PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" - : [wdata] "+r"(gw.get_work) + "sub %[mbuf], %H[wdata], #0x80 \n" + : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf) : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else @@ -109,11 +126,34 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -128,6 +168,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -138,7 +179,9 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " ldp %[tag], %[wqp], [%[tag_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else @@ -146,12 +189,25 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -170,16 +226,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn10k_worker_deq.c b/drivers/event/cnxk/cn10k_worker_deq.c new file mode 100644 index 0000000000..36ec454ccc --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_burst.c b/drivers/event/cnxk/cn10k_worker_deq_burst.c new file mode 100644 index 0000000000..29ecc551cf --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_tmo.c b/drivers/event/cnxk/cn10k_worker_deq_tmo.c new file mode 100644 index 0000000000..c8524a27bd --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 072800c243..e386cb784a 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -252,17 +252,202 @@ static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + /* Single WS modes */ + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + /* Dual WS modes */ + const event_dequeue_t sso_hws_dual_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn9k_sso_hws_deq; - event_dev->dequeue_burst = cn9k_sso_hws_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } if (dev->dual_ws) { @@ -272,14 +457,110 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) event_dev->enqueue_forward_burst = cn9k_sso_hws_dual_enq_fwd_burst; - event_dev->dequeue = cn9k_sso_hws_dual_deq; - event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq; - event_dev->dequeue_burst = - cn9k_sso_hws_dual_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_dual_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_dual_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } } } + + rte_mb(); } static void * diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c index 9ceacc98dd..538bc4b0b3 100644 --- a/drivers/event/cnxk/cn9k_worker.c +++ b/drivers/event/cnxk/cn9k_worker.c @@ -60,60 +60,6 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } -uint16_t __rte_hot -cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return 1; - } - - return cn9k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return ret; - } - - ret = cn9k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn9k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} - /* Dual ws ops. */ uint16_t __rte_hot @@ -171,66 +117,3 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t gw; - - RTE_SET_USED(timeout_ticks); - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return 1; - } - - gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - return gw; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t ret = 1; - uint64_t iter; - - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return ret; - } - - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - } - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index f5a4401465..c01c00e1da 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -128,17 +128,36 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, } } +static __rte_always_inline void +cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, struct cn9k_sso_hws_state *ws_pair, - struct rte_event *ev) + struct rte_event *ev, const uint32_t flags, + const void *const lookup_mem, + struct cnxk_timesync_info *const tstamp) { const uint64_t set_gw = BIT_ULL(16) | 1; union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE "rty%=: \n" @@ -147,7 +166,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, " tbnz %[tag], 63, rty%= \n" "done%=: str %[gw], [%[pong]] \n" " dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op), [gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op)); #else @@ -156,12 +178,34 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); plt_write64(set_gw, ws_pair->getwrk_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -169,16 +213,22 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, } static __rte_always_inline uint16_t -cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) +cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, const void *const lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; plt_write64(BIT_ULL(16) | /* wait for work. */ 1, /* Use Mask set 0. */ ws->getwrk_op); + + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE " ldr %[tag], [%[tag_loc]] \n" @@ -190,7 +240,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -198,12 +251,35 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -218,6 +294,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -230,7 +307,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -238,12 +317,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -274,28 +366,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); - -uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c new file mode 100644 index 0000000000..51ccaf4ec4 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_burst.c b/drivers/event/cnxk/cn9k_worker_deq_burst.c new file mode 100644 index 0000000000..4e2801459b --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c new file mode 100644 index 0000000000..9713d1ef00 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c new file mode 100644 index 0000000000..709fa2d9ef --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c new file mode 100644 index 0000000000..d50e1cf83f --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c new file mode 100644 index 0000000000..a0508fdf0d --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_##name(port, ev, \ + timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index b65d725f55..9d5d2d0339 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -33,6 +33,7 @@ #define CNXK_SSO_MZ_NAME "cnxk_evdev_mz" #define CNXK_SSO_XAQ_CACHE_CNT (0x7) #define CNXK_SSO_XAQ_SLACK (8) +#define CNXK_SSO_WQE_SG_PTR (9) #define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) #define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY) diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index eda562f5b5..c5c1c0ee8e 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -11,8 +11,17 @@ endif sources = files( 'cn9k_eventdev.c', 'cn9k_worker.c', + 'cn9k_worker_deq.c', + 'cn9k_worker_deq_burst.c', + 'cn9k_worker_deq_tmo.c', + 'cn9k_worker_dual_deq.c', + 'cn9k_worker_dual_deq_burst.c', + 'cn9k_worker_dual_deq_tmo.c', 'cn10k_eventdev.c', 'cn10k_worker.c', + 'cn10k_worker_deq.c', + 'cn10k_worker_deq_burst.c', + 'cn10k_worker_deq_tmo.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com> --- drivers/common/cnxk/roc_nix.h | 1 + drivers/common/cnxk/roc_nix_queue.c | 8 +- drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++ drivers/event/cnxk/cn9k_eventdev.c | 148 +++++++++++++++++++++++ drivers/event/cnxk/cnxk_eventdev.h | 22 +++- drivers/event/cnxk/cnxk_eventdev_adptr.c | 88 ++++++++++++++ 6 files changed, 353 insertions(+), 5 deletions(-) diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index 76613fe84e..822c1900e2 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -200,6 +200,7 @@ struct roc_nix_sq { uint64_t aura_handle; int16_t nb_sqb_bufs_adj; uint16_t nb_sqb_bufs; + uint16_t aura_sqb_bufs; plt_iova_t io_addr; void *lmt_addr; void *sqe_mem; diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c index 0604e7a18e..7e2f86eca7 100644 --- a/drivers/common/cnxk/roc_nix_queue.c +++ b/drivers/common/cnxk/roc_nix_queue.c @@ -587,12 +587,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) aura.fc_ena = 1; aura.fc_addr = (uint64_t)sq->fc; aura.fc_hyst_bits = 0; /* Store count on all updates */ - rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, nb_sqb_bufs, &aura, + rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, NIX_MAX_SQB, &aura, &pool); if (rc) goto fail; - sq->sqe_mem = plt_zmalloc(blk_sz * nb_sqb_bufs, blk_sz); + sq->sqe_mem = plt_zmalloc(blk_sz * NIX_MAX_SQB, blk_sz); if (sq->sqe_mem == NULL) { rc = NIX_ERR_NO_MEM; goto nomem; @@ -600,11 +600,13 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) /* Fill the initial buffers */ iova = (uint64_t)sq->sqe_mem; - for (count = 0; count < nb_sqb_bufs; count++) { + for (count = 0; count < NIX_MAX_SQB; count++) { roc_npa_aura_op_free(sq->aura_handle, 0, iova); iova += blk_sz; } roc_npa_aura_op_range_set(sq->aura_handle, (uint64_t)sq->sqe_mem, iova); + roc_npa_aura_limit_modify(sq->aura_handle, sq->nb_sqb_bufs); + sq->aura_sqb_bufs = NIX_MAX_SQB; return rc; nomem: diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index ba7d95fff7..8a9b04a3db 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); + ws->tx_base = ws->base; ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn10k_sso_hws) + + (sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + + return 0; +} + static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset, cn10k_sso_hws_flush_events); if (rc < 0) @@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + return cn10k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index e386cb784a..21f80323d9 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(dws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + dws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&dws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = dws; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + } + rte_mb(); + + return 0; +} + static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset, cn9k_sso_hws_flush_events); if (rc < 0) @@ -844,6 +908,86 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static void +cn9k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id, + bool ena) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cn9k_eth_txq *txq; + struct roc_nix_sq *sq; + int i; + + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) + cn9k_sso_txq_fc_update(eth_dev, i, ena); + } else { + uint16_t sq_limit; + + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + txq = eth_dev->data->tx_queues[tx_queue_id]; + sq_limit = + ena ? RTE_MIN(CNXK_SSO_SQB_LIMIT, sq->aura_sqb_bufs) : + sq->nb_sqb_bufs; + txq->nb_sqb_bufs_adj = + sq_limit - + RTE_ALIGN_MUL_CEIL(sq_limit, + (1ULL << txq->sqes_per_sqb_log2)) / + (1ULL << txq->sqes_per_sqb_log2); + txq->nb_sqb_bufs_adj = (70 * txq->nb_sqb_bufs_adj) / 100; + } +} + +static int +cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, true); + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, false); + return cn9k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -863,6 +1007,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 9d5d2d0339..24e1be6a97 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -8,6 +8,7 @@ #include <rte_devargs.h> #include <rte_ethdev.h> #include <rte_event_eth_rx_adapter.h> +#include <rte_event_eth_tx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -34,6 +35,7 @@ #define CNXK_SSO_XAQ_CACHE_CNT (0x7) #define CNXK_SSO_XAQ_SLACK (8) #define CNXK_SSO_WQE_SG_PTR (9) +#define CNXK_SSO_SQB_LIMIT (0x180) #define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) #define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY) @@ -86,9 +88,12 @@ struct cnxk_sso_evdev { rte_iova_t fc_iova; struct rte_mempool *xaq_pool; uint64_t rx_offloads; + uint64_t tx_offloads; uint64_t adptr_xae_cnt; uint16_t rx_adptr_pool_cnt; uint64_t *rx_adptr_pools; + uint64_t *tx_adptr_data; + uint16_t max_port_id; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -115,7 +120,10 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; + /* Tx Fastpath data */ + uint64_t tx_base __rte_cache_aligned; uintptr_t lmt_base; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; /* CN9K HWS ops */ @@ -140,7 +148,9 @@ struct cn9k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; + /* Tx Fastpath data */ + uint64_t base __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cn9k_sso_hws_state { @@ -160,7 +170,9 @@ struct cn9k_sso_hws_dual { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base[2]; + /* Tx Fastpath data */ + uint64_t base[2] __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cnxk_sso_hws_cookie { @@ -267,5 +279,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); +int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); +int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 3b7ecb375a..502da272d8 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -223,3 +223,91 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, return 0; } + +static int +cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs) +{ + return roc_npa_aura_limit_modify( + sq->aura_handle, RTE_MIN(nb_sqb_bufs, sq->aura_sqb_bufs)); +} + +static int +cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev, + uint16_t eth_port_id, uint16_t tx_queue_id, + void *txq) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t max_port_id = dev->max_port_id; + uint64_t *txq_data = dev->tx_adptr_data; + + if (txq_data == NULL || eth_port_id > max_port_id) { + max_port_id = RTE_MAX(max_port_id, eth_port_id); + txq_data = rte_realloc_socket( + txq_data, + (sizeof(uint64_t) * (max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, event_dev->data->socket_id); + if (txq_data == NULL) + return -ENOMEM; + } + + ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) + txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq; + dev->max_port_id = max_port_id; + dev->tx_adptr_data = txq_data; + return 0; +} + +int +cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct roc_nix_sq *sq; + int i, ret; + void *txq; + + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) + cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, i); + } else { + txq = eth_dev->data->tx_queues[tx_queue_id]; + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, txq); + if (ret < 0) + return ret; + + dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags; + } + + return 0; +} + +int +cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct roc_nix_sq *sq; + int i, ret; + + RTE_SET_USED(event_dev); + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) + cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, i); + } else { + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, NULL); + if (ret < 0) + return ret; + } + + return 0; +} -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 38 ++++++++ drivers/event/cnxk/cn10k_worker.h | 67 +++++++++++++ drivers/event/cnxk/cn10k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn10k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_eventdev.c | 81 ++++++++++++++++ drivers/event/cnxk/cn9k_worker.h | 97 +++++++++++++++++++ drivers/event/cnxk/cn9k_worker_dual_tx_enq.c | 23 +++++ .../event/cnxk/cn9k_worker_dual_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cnxk_worker.h | 27 +++--- drivers/event/cnxk/meson.build | 6 ++ 12 files changed, 440 insertions(+), 14 deletions(-) create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 8a9b04a3db..e462f770c5 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; @@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; } static void diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index b724083caa..3c90c85009 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -11,6 +11,7 @@ #include "cn10k_ethdev.h" #include "cn10k_rx.h" +#include "cn10k_tx.h" /* SSO Operations */ @@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn10k_eth_txq * +cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn10k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline uint16_t +cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, + uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + const struct cn10k_eth_txq *txq; + struct rte_mbuf *m = ev->mbuf; + uint16_t ref_cnt = m->refcnt; + uintptr_t lmt_addr; + uint16_t lmt_id; + uintptr_t pa; + + lmt_addr = ws->lmt_base; + ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + txq = cn10k_sso_hws_xtract_meta(m, txq_data); + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; + } + if (!ev->sched_type) + cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, + ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c new file mode 100644 index 0000000000..f9968ac0d0 --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c new file mode 100644 index 0000000000..a24fc42e5a --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 21f80323d9..a69edff195 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -430,6 +430,39 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; @@ -510,6 +543,25 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) } } + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + if (dev->dual_ws) { event_dev->enqueue = cn9k_sso_hws_dual_enq; event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst; @@ -618,8 +670,37 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] + */ + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } } + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; rte_mb(); } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index c01c00e1da..3f9751211a 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -11,6 +11,7 @@ #include "cn9k_ethdev.h" #include "cn9k_rx.h" +#include "cn9k_tx.h" /* SSO Operations */ @@ -416,4 +417,100 @@ NIX_RX_FASTPATH_MODES NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline void +cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq) +{ + while (!(((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) + << (txq)->sqes_per_sqb_log2)) + ; +} + +static __rte_always_inline const struct cn9k_eth_txq * +cn9k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn9k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline void +cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m, + uint64_t *cmd, const uint32_t flags) +{ + roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags)); + cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt); +} + +static __rte_always_inline uint16_t +cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + struct rte_mbuf *m = ev->mbuf; + const struct cn9k_eth_txq *txq; + uint16_t ref_cnt = m->refcnt; + + /* Perform header writes before barrier for TSO */ + cn9k_nix_xmit_prepare_tso(m, flags); + /* Lets commit any changes in the packet here in case when + * fast free is set as no further changes will be made to mbuf. + * In case of fast free is not set, both cn9k_nix_prepare_mseg() + * and cn9k_nix_xmit_prepare() has a barrier after refcnt update. + */ + if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_io_wmb(); + txq = cn9k_sso_hws_xtract_meta(m, txq_data); + cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags); + + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + cn9k_sso_txq_fc_wait(txq); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, + txq->io_addr, segdw); + } else { + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, + segdw); + } + } else { + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + cn9k_sso_txq_fc_wait(txq); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_one(cmd, txq->lmt_addr, + txq->io_addr, flags); + } else { + cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, + flags); + } + } + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG, + base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c new file mode 100644 index 0000000000..92e2981f02 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws_dual *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c new file mode 100644 index 0000000000..dfb574cf95 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws_dual *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c new file mode 100644 index 0000000000..3df649c0c8 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c new file mode 100644 index 0000000000..0efe29113e --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h index 4eb46ae162..7891b749df 100644 --- a/drivers/event/cnxk/cnxk_worker.h +++ b/drivers/event/cnxk/cnxk_worker.h @@ -79,21 +79,20 @@ static __rte_always_inline void cnxk_sso_hws_head_wait(uintptr_t tag_op) { #ifdef RTE_ARCH_ARM64 - uint64_t swtp; - - asm volatile(PLT_CPU_FEATURE_PREAMBLE - " ldr %[swtb], [%[swtp_loc]] \n" - " tbz %[swtb], 35, done%= \n" - " sevl \n" - "rty%=: wfe \n" - " ldr %[swtb], [%[swtp_loc]] \n" - " tbnz %[swtb], 35, rty%= \n" - "done%=: \n" - : [swtb] "=&r"(swtp) - : [swtp_loc] "r"(tag_op)); + uint64_t tag; + + asm volatile(" ldr %[tag], [%[tag_op]] \n" + " tbnz %[tag], 35, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[tag], [%[tag_op]] \n" + " tbz %[tag], 35, rty%= \n" + "done%=: \n" + : [tag] "=&r"(tag) + : [tag_op] "r"(tag_op)); #else - /* Wait for the SWTAG/SWTAG_FULL operation */ - while (plt_read64(tag_op) & BIT_ULL(35)) + /* Wait for the HEAD to be set */ + while (!(plt_read64(tag_op) & BIT_ULL(35))) ; #endif } diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index c5c1c0ee8e..13e0634e86 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -17,11 +17,17 @@ sources = files( 'cn9k_worker_dual_deq.c', 'cn9k_worker_dual_deq_burst.c', 'cn9k_worker_dual_deq_tmo.c', + 'cn9k_worker_tx_enq.c', + 'cn9k_worker_tx_enq_seg.c', + 'cn9k_worker_dual_tx_enq.c', + 'cn9k_worker_dual_tx_enq_seg.c', 'cn10k_eventdev.c', 'cn10k_worker.c', 'cn10k_worker_deq.c', 'cn10k_worker_deq_burst.c', 'cn10k_worker_deq_tmo.c', + 'cn10k_worker_tx_enq.c', + 'cn10k_worker_tx_enq_seg.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add event vector support for cnxk event Rx adapter, add control path APIs to get vector limits and ability to configure event vectorization on a given Rx queue. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 106 ++++++++++++++++++++++- drivers/event/cnxk/cnxk_eventdev.h | 2 + drivers/event/cnxk/cnxk_eventdev_adptr.c | 25 ++++++ drivers/net/cnxk/cnxk_ethdev.h | 2 +- 4 files changed, 133 insertions(+), 2 deletions(-) diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e462f770c5..e85fa4785d 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, else *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | - RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID | + RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR; return 0; } @@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_rx_adapter_vector_limits( + const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, + struct rte_event_eth_rx_adapter_vector_limits *limits) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + return -ENOTSUP; + + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + limits->log2_sz = true; + limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2; + limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2; + limits->min_timeout_ns = + (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100; + limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns; + + return 0; +} + +static int +cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev, + uint16_t port_id, uint16_t rq_id, uint16_t sz, + uint64_t tmo_ns, struct rte_mempool *vmp) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + + if (!rq->sso_ena) + return -EINVAL; + if (rq->flow_tag_width == 0) + return -EINVAL; + + rq->vwqe_ena = 1; + rq->vwqe_first_skip = 0; + rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id); + rq->vwqe_max_sz_exp = rte_log2_u32(sz); + rq->vwqe_wait_tmo = + tmo_ns / + ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100); + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= + (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4)) + << 24; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cn10k_sso_rx_adapter_vector_config( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_event_vector_config *config) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + struct cnxk_sso_evdev *dev; + int i, rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + dev = cnxk_sso_pmd_priv(event_dev); + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, i, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + } else { + + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, rx_queue_id, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + + return 0; +} + static int cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, uint32_t *caps) @@ -739,6 +839,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_rx_adapter_vector_limits_get = cn10k_sso_rx_adapter_vector_limits, + .eth_rx_adapter_event_vector_config = + cn10k_sso_rx_adapter_vector_config, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 24e1be6a97..fc49b88d6f 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -97,6 +97,8 @@ struct cnxk_sso_evdev { uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; + uint16_t vec_pool_cnt; + uint64_t *vec_pools; /* Dev args */ uint32_t xae_cnt; uint8_t qos_queue_cnt; diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 502da272d8..baf2f2aa6b 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -38,6 +38,31 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, dev->adptr_xae_cnt += rxq->qconf.mp->size; break; } + case RTE_EVENT_TYPE_ETHDEV_VECTOR: { + struct rte_mempool *mp = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->vec_pool_cnt; i++) { + if ((uint64_t)mp == dev->vec_pools[i]) + return; + } + + dev->vec_pool_cnt++; + old_ptr = dev->vec_pools; + dev->vec_pools = + rte_realloc(dev->vec_pools, + sizeof(uint64_t) * dev->vec_pool_cnt, 0); + if (dev->vec_pools == NULL) { + dev->adptr_xae_cnt += mp->size; + dev->vec_pools = old_ptr; + dev->vec_pool_cnt--; + return; + } + dev->vec_pools[dev->vec_pool_cnt - 1] = (uint64_t)mp; + + dev->adptr_xae_cnt += mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h index 4eead03905..2528b3cdaa 100644 --- a/drivers/net/cnxk/cnxk_ethdev.h +++ b/drivers/net/cnxk/cnxk_ethdev.h @@ -238,7 +238,7 @@ struct cnxk_eth_txq_sp { } __plt_cache_aligned; static inline struct cnxk_eth_dev * -cnxk_eth_pmd_priv(struct rte_eth_dev *eth_dev) +cnxk_eth_pmd_priv(const struct rte_eth_dev *eth_dev) { return eth_dev->data->dev_private; } -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Rx event vector fastpath to convert HW defined metadata into rte_mbuf and rte_event_vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_worker.h | 56 ++++++++ drivers/net/cnxk/cn10k_rx.h | 200 ++++++++++++++++----------- drivers/net/cnxk/cn10k_rx_vec.c | 2 +- drivers/net/cnxk/cn10k_rx_vec_mseg.c | 5 +- 4 files changed, 178 insertions(+), 85 deletions(-) diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 3c90c85009..7a48a6b17d 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,6 +5,8 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include <rte_vect.h> + #include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" @@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, mbuf_init | ((uint64_t)port_id) << 48, flags); } +static __rte_always_inline void +cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, + void *lookup_mem, void *tstamp) +{ + uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + struct rte_event_vector *vec; + uint16_t nb_mbufs, non_vec; + uint64_t **wqe; + + mbuf_init |= ((uint64_t)port_id) << 48; + vec = (struct rte_event_vector *)vwqe; + wqe = vec->u64s; + + nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP); + nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs, + flags | NIX_RX_VWQE_F, lookup_mem, + tstamp); + wqe += nb_mbufs; + non_vec = vec->nb_elem - nb_mbufs; + + while (non_vec) { + struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0]; + struct rte_mbuf *mbuf; + uint64_t tstamp_ptr; + + mbuf = (struct rte_mbuf *)((char *)cqe - + sizeof(struct rte_mbuf)); + cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, + mbuf_init, flags); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + wqe[0] = (uint64_t *)mbuf; + non_vec--; + wqe++; + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, const uint32_t flags, void *lookup_mem) @@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, flags & NIX_RX_MULTI_SEG_F, (uint64_t *)tstamp_ptr); gw.u64[1] = mbuf; + } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV_VECTOR) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1]; + + vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | + ((vwqe_hdr & 0xFFFF) << 48) | + ((uint64_t)port << 32); + *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr; + cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem, + ws->tstamp); } } diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index d9572b19e7..4c5288b2cc 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -21,6 +21,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_RX_VWQE_F BIT(14) #define NIX_RX_MULTI_SEG_F BIT(15) #define CNXK_NIX_CQ_ENTRY_SZ 128 @@ -28,6 +29,11 @@ #define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x)) #define CQE_SZ(x) ((x) * CNXK_NIX_CQ_ENTRY_SZ) +#define CQE_PTR_OFF(b, i, o, f) \ + (((f) & NIX_RX_VWQE_F) ? \ + (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \ + (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o))) + union mbuf_initializer { struct { uint16_t data_off; @@ -317,61 +323,87 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf) } static __rte_always_inline uint16_t -cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) +cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, + const uint16_t flags, void *lookup_mem, + struct cnxk_timesync_info *tstamp) { - struct cn10k_eth_rxq *rxq = rx_queue; - uint16_t packets = 0; + struct cn10k_eth_rxq *rxq = args; + const uint64_t mbuf_initializer = (flags & NIX_RX_VWQE_F) ? + *(uint64_t *)args : + rxq->mbuf_initializer; + const uint64x2_t data_off = flags & NIX_RX_VWQE_F ? + vdupq_n_u64(0x80ULL) : + vdupq_n_u64(rxq->data_off); + const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask; + const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata; + const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc; uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23; - const uint64_t mbuf_initializer = rxq->mbuf_initializer; - const uint64x2_t data_off = vdupq_n_u64(rxq->data_off); uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3; uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer); struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3; - const uint16_t *lookup_mem = rxq->lookup_mem; - const uint32_t qmask = rxq->qmask; - const uint64_t wdata = rxq->wdata; - const uintptr_t desc = rxq->desc; uint8x16_t f0, f1, f2, f3; - uint32_t head = rxq->head; + uint16_t packets = 0; uint16_t pkts_left; - - pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); - pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); - - /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + uint32_t head; + uintptr_t cq0; + + if (!(flags & NIX_RX_VWQE_F)) { + lookup_mem = rxq->lookup_mem; + head = rxq->head; + + pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); + pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); + /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) + tstamp = rxq->tstamp; + } else { + RTE_SET_USED(head); + } while (packets < pkts) { - /* Exit loop if head is about to wrap and become unaligned */ - if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < - NIX_DESCS_PER_LOOP) { - pkts_left += (pkts - packets); - break; - } + if (!(flags & NIX_RX_VWQE_F)) { + /* Exit loop if head is about to wrap and become + * unaligned. + */ + if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < + NIX_DESCS_PER_LOOP) { + pkts_left += (pkts - packets); + break; + } - const uintptr_t cq0 = desc + CQE_SZ(head); + cq0 = desc + CQE_SZ(head); + } else { + cq0 = (uintptr_t)&mbufs[packets]; + } /* Prefetch N desc ahead */ - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(8))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(9))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(10))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(11))); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 8, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 9, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 10, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 11, 0, flags)); /* Get NIX_RX_SG_S for size and buffer pointer */ - cq0_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(0) + 64)); - cq1_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(1) + 64)); - cq2_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(2) + 64)); - cq3_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(3) + 64)); - - /* Extract mbuf from NIX_RX_SG_S */ - mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); - mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); - mbuf01 = vqsubq_u64(mbuf01, data_off); - mbuf23 = vqsubq_u64(mbuf23, data_off); + cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags)); + cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags)); + cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags)); + cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags)); + + if (!(flags & NIX_RX_VWQE_F)) { + /* Extract mbuf from NIX_RX_SG_S */ + mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); + mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); + mbuf01 = vqsubq_u64(mbuf01, data_off); + mbuf23 = vqsubq_u64(mbuf23, data_off); + } else { + mbuf01 = + vsubq_u64(vld1q_u64((uint64_t *)cq0), data_off); + mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)), + data_off); + } /* Move mbufs to scalar registers for future use */ mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0); @@ -395,14 +427,14 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, f3 = vqtbl1q_u8(cq3_w8, shuf_msk); /* Load CQE word0 and word 1 */ - uint64_t cq0_w0 = ((uint64_t *)(cq0 + CQE_SZ(0)))[0]; - uint64_t cq0_w1 = ((uint64_t *)(cq0 + CQE_SZ(0)))[1]; - uint64_t cq1_w0 = ((uint64_t *)(cq0 + CQE_SZ(1)))[0]; - uint64_t cq1_w1 = ((uint64_t *)(cq0 + CQE_SZ(1)))[1]; - uint64_t cq2_w0 = ((uint64_t *)(cq0 + CQE_SZ(2)))[0]; - uint64_t cq2_w1 = ((uint64_t *)(cq0 + CQE_SZ(2)))[1]; - uint64_t cq3_w0 = ((uint64_t *)(cq0 + CQE_SZ(3)))[0]; - uint64_t cq3_w1 = ((uint64_t *)(cq0 + CQE_SZ(3)))[1]; + const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags); + const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 8, flags); + const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags); + const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 8, flags); + const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags); + const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 8, flags); + const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags); + const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 8, flags); if (flags & NIX_RX_OFFLOAD_RSS_F) { /* Fill rss in the rx_descriptor_fields1 */ @@ -459,17 +491,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) { ol_flags0 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(0) + 38), ol_flags0, - mbuf0); + *(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags), + ol_flags0, mbuf0); ol_flags1 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(1) + 38), ol_flags1, - mbuf1); + *(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags), + ol_flags1, mbuf1); ol_flags2 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(2) + 38), ol_flags2, - mbuf2); + *(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags), + ol_flags2, mbuf2); ol_flags3 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(3) + 38), ol_flags3, - mbuf3); + *(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags), + ol_flags3, mbuf3); } if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { @@ -488,7 +520,7 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, RTE_PTYPE_L2_ETHER_TIMESYNC}; const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | PKT_RX_IEEE1588_TMST | - rxq->tstamp->rx_tstamp_dynflag; + tstamp->rx_tstamp_dynflag; const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; uint64x2_t ts01, ts23, mask; uint64_t ts[4]; @@ -526,14 +558,10 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, ts[3] = vgetq_lane_u64(ts23, 1); /* Store timestamp into dynfield. */ - *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = - ts[0]; - *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = - ts[1]; - *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = - ts[2]; - *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = - ts[3]; + *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3]; /* Generate ptype mask to filter L2 ether timesync */ mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); @@ -559,9 +587,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, /* Update Rxq timestamp with the latest * timestamp. */ - rxq->tstamp->rx_ready = 1; - rxq->tstamp->rx_tstamp = - ts[31 - __builtin_clz(res)]; + tstamp->rx_ready = 1; + tstamp->rx_tstamp = ts[31 - __builtin_clz(res)]; } } @@ -584,25 +611,25 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); /* Store the mbufs to rx_pkts */ - vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); - vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); + vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); if (flags & NIX_RX_MULTI_SEG_F) { /* Multi segment is enable build mseg list for * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 0, 8, flags)), + mbuf0, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 1, 8, flags)), + mbuf1, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 2, 8, flags)), + mbuf2, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 3, 8, flags)), + mbuf3, mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; @@ -623,12 +650,18 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, __mempool_check_cookies(mbuf2->pool, (void **)&mbuf2, 1, 1); __mempool_check_cookies(mbuf3->pool, (void **)&mbuf3, 1, 1); - /* Advance head pointer and packets */ - head += NIX_DESCS_PER_LOOP; - head &= qmask; packets += NIX_DESCS_PER_LOOP; + + if (!(flags & NIX_RX_VWQE_F)) { + /* Advance head pointer and packets */ + head += NIX_DESCS_PER_LOOP; + head &= qmask; + } } + if (flags & NIX_RX_VWQE_F) + return packets; + rxq->head = head; rxq->available -= packets; @@ -637,8 +670,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, plt_write64((rxq->wdata | packets), rxq->cq_door); if (unlikely(pkts_left)) - packets += cn10k_nix_recv_pkts(rx_queue, &rx_pkts[packets], - pkts_left, flags); + packets += cn10k_nix_recv_pkts(args, &mbufs[packets], pkts_left, + flags); return packets; } @@ -647,12 +680,15 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, static inline uint16_t cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) + uint16_t pkts, const uint16_t flags, + void *lookup_mem, void *tstamp) { + RTE_SET_USED(lookup_mem); RTE_SET_USED(rx_queue); RTE_SET_USED(rx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(flags); + RTE_SET_USED(tstamp); return 0; } diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c index 93528a44f9..166735ad59 100644 --- a/drivers/net/cnxk/cn10k_rx_vec.c +++ b/drivers/net/cnxk/cn10k_rx_vec.c @@ -12,7 +12,7 @@ uint16_t pkts) \ { \ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags)); \ + (flags), NULL, NULL); \ } NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c index 04d1e46c82..1f44dddddd 100644 --- a/drivers/net/cnxk/cn10k_rx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c @@ -9,8 +9,9 @@ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ { \ - return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags) | NIX_RX_MULTI_SEG_F); \ + return cn10k_nix_recv_pkts_vector( \ + rx_queue, rx_pkts, pkts, (flags) | NIX_RX_MULTI_SEG_F, \ + NULL, NULL); \ } NIX_RX_FASTPATH_MODES -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Tx event vector fastpath, integrate event vector Tx routine into Tx burst. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/common/cnxk/roc_sso.h | 23 ++++++ drivers/event/cnxk/cn10k_eventdev.c | 3 +- drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++++-- drivers/event/cnxk/cn9k_worker.h | 4 +- drivers/event/cnxk/cnxk_worker.h | 22 ------ drivers/net/cnxk/cn10k_tx.c | 2 +- drivers/net/cnxk/cn10k_tx.h | 52 +++++++++----- drivers/net/cnxk/cn10k_tx_mseg.c | 3 +- drivers/net/cnxk/cn10k_tx_vec.c | 2 +- drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +- 10 files changed, 165 insertions(+), 52 deletions(-) diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h index a6030e7d8a..316c6ccd59 100644 --- a/drivers/common/cnxk/roc_sso.h +++ b/drivers/common/cnxk/roc_sso.h @@ -44,6 +44,29 @@ struct roc_sso { uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned; } __plt_cache_aligned; +static __rte_always_inline void +roc_sso_hws_head_wait(uintptr_t tag_op) +{ +#ifdef RTE_ARCH_ARM64 + uint64_t tag; + + asm volatile(PLT_CPU_FEATURE_PREAMBLE + " ldr %[tag], [%[tag_op]] \n" + " tbnz %[tag], 35, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[tag], [%[tag_op]] \n" + " tbz %[tag], 35, rty%= \n" + "done%=: \n" + : [tag] "=&r"(tag) + : [tag_op] "r"(tag_op)); +#else + /* Wait for the SWTAG/SWTAG_FULL operation */ + while (!(plt_read64(tag_op) & BIT_ULL(35))) + ; +#endif +} + /* SSO device initialization */ int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso); int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso); diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e85fa4785d..6f37c5bd23 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, if (ret) *caps = 0; else - *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR; return 0; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 7a48a6b17d..9cc0992063 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R -static __rte_always_inline const struct cn10k_eth_txq * +static __rte_always_inline struct cn10k_eth_txq * cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) { - return (const struct cn10k_eth_txq *) + return (struct cn10k_eth_txq *) txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; } +static __rte_always_inline void +cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs, + uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr, + uint8_t sched_type, uintptr_t base, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + uint16_t port[4], queue[4]; + struct cn10k_eth_txq *txq; + uint16_t i, j; + uintptr_t pa; + + for (i = 0; i < nb_mbufs; i += 4) { + port[0] = mbufs[i]->port; + port[1] = mbufs[i + 1]->port; + port[2] = mbufs[i + 2]->port; + port[3] = mbufs[i + 3]->port; + + queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]); + queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]); + queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]); + queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]); + + if (((port[0] ^ port[1]) & (port[2] ^ port[3])) || + ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) { + + for (j = 0; j < 4; j++) { + struct rte_mbuf *m = mbufs[i + j]; + + txq = (struct cn10k_eth_txq *) + txq_data[port[j]][queue[j]]; + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier + * for TSO + */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, + txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg( + m, (uint64_t *)lmt_addr, + flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | + (cn10k_nix_tx_ext_subs(flags) + 1) + << 4; + } + if (!sched_type) + roc_sso_hws_head_wait(base + + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + } + } else { + txq = (struct cn10k_eth_txq *) + txq_data[port[0]][queue[0]]; + cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base + + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], const uint32_t flags) { - const struct cn10k_eth_txq *txq; - struct rte_mbuf *m = ev->mbuf; - uint16_t ref_cnt = m->refcnt; + struct cn10k_eth_txq *txq; + struct rte_mbuf *m; uintptr_t lmt_addr; + uint16_t ref_cnt; uint16_t lmt_id; uintptr_t pa; lmt_addr = ws->lmt_base; ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + + if (ev->event_type & RTE_EVENT_TYPE_VECTOR) { + struct rte_mbuf **mbufs = ev->vec->mbufs; + uint64_t meta = *(uint64_t *)ev->vec; + + if (meta & BIT(31)) { + txq = (struct cn10k_eth_txq *) + txq_data[meta >> 32][meta >> 48]; + + cn10k_nix_xmit_pkts_vector( + txq, mbufs, meta & 0xFFFF, cmd, + ws->tx_base + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } else { + cn10k_sso_vwqe_split_tx( + mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr, + ev->sched_type, ws->tx_base, txq_data, flags); + } + rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec); + return (meta & 0xFFFF); + } + + m = ev->mbuf; + ref_cnt = m->refcnt; txq = cn10k_sso_hws_xtract_meta(m, txq_data); cn10k_nix_tx_skeleton(txq, cmd, flags); /* Perform header writes before barrier for TSO */ @@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; } if (!ev->sched_type) - cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); roc_lmt_submit_steorl(lmt_id, pa); @@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); - return 1; } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 3f9751211a..cc1e141957 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -466,7 +466,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); cn9k_sso_txq_fc_wait(txq); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, @@ -478,7 +478,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, } else { if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); cn9k_sso_txq_fc_wait(txq); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_one(cmd, txq->lmt_addr, diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h index 7891b749df..9f9ceab8a1 100644 --- a/drivers/event/cnxk/cnxk_worker.h +++ b/drivers/event/cnxk/cnxk_worker.h @@ -75,26 +75,4 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op) #endif } -static __rte_always_inline void -cnxk_sso_hws_head_wait(uintptr_t tag_op) -{ -#ifdef RTE_ARCH_ARM64 - uint64_t tag; - - asm volatile(" ldr %[tag], [%[tag_op]] \n" - " tbnz %[tag], 35, done%= \n" - " sevl \n" - "rty%=: wfe \n" - " ldr %[tag], [%[tag_op]] \n" - " tbz %[tag], 35, rty%= \n" - "done%=: \n" - : [tag] "=&r"(tag) - : [tag_op] "r"(tag_op)); -#else - /* Wait for the HEAD to be set */ - while (!(plt_read64(tag_op) & BIT_ULL(35))) - ; -#endif -} - #endif diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 1f30bab59a..0e1276c60b 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \ - flags); \ + 0, flags); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index eb148b8e77..f75cae07ae 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -18,6 +18,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_TX_VWQE_F BIT(14) #define NIX_TX_MULTI_SEG_F BIT(15) #define NIX_TX_NEED_SEND_HDR_W1 \ @@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) static __rte_always_inline uint16_t cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, - uint64_t *cmd, const uint16_t flags) + uint64_t *cmd, uintptr_t base, const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; const rte_iova_t io_addr = txq->io_addr; @@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t lso_tun_fmt; uint64_t data; - NIX_XMIT_FC_OR_RETURN(txq, pkts); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } /* Get cmd skeleton */ cn10k_nix_tx_skeleton(txq, cmd, flags); - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; - if (flags & NIX_TX_OFFLOAD_TSO_F) lso_tun_fmt = txq->lso_tun_fmt; @@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2); } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (burst > 16) { data = cn10k_nix_tx_steor_data(flags); @@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; uintptr_t pa0, pa1, lmt_addr = txq->lmt_base; @@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, shft += 3; } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + data0 = (uint64_t)data128; data1 = (uint64_t)(data128 >> 64); /* Make data0 similar to data1 */ @@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; @@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64_t data[2]; } wd; - NIX_XMIT_FC_OR_RETURN(txq, pkts); - - scalar = pkts & (NIX_DESCS_PER_LOOP - 1); - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } else { + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + } - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; /* Perform header writes before barrier for TSO */ if (flags & NIX_TX_OFFLOAD_TSO_F) { for (i = 0; i < pkts; i++) @@ -1973,6 +1987,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (flags & NIX_TX_MULTI_SEG_F) wd.data[0] >>= 16; + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (lnum > 16) { if (!(flags & NIX_TX_MULTI_SEG_F)) @@ -2029,10 +2046,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (unlikely(scalar)) { if (flags & NIX_TX_MULTI_SEG_F) pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, - scalar, cmd, flags); + scalar, cmd, base, + flags); else pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, - cmd, flags); + cmd, base, flags); } return pkts; @@ -2041,13 +2059,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, #else static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { RTE_SET_USED(tx_queue); RTE_SET_USED(tx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(cmd); RTE_SET_USED(flags); + RTE_SET_USED(base); return 0; } #endif diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c index 33f6754722..4ea4c8a4e5 100644 --- a/drivers/net/cnxk/cn10k_tx_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_mseg.c @@ -18,7 +18,8 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \ - (flags) | NIX_TX_MULTI_SEG_F); \ + 0, (flags) \ + | NIX_TX_MULTI_SEG_F); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 34e3737501..a0350496ab 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -18,7 +18,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ - (flags)); \ + 0, (flags)); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c index 1fad81dbad..7f98f79b97 100644 --- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector( \ - tx_queue, tx_pkts, pkts, cmd, \ + tx_queue, tx_pkts, pkts, cmd, 0, \ (flags) | NIX_TX_MULTI_SEG_F); \ } -- 2.17.1
On Mon, Jul 12, 2021 at 5:01 AM <pbhagavatula@marvell.com> wrote: > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Add Tx event vector fastpath, integrate event vector Tx routine > into Tx burst. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> * **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.** * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested is net/cnxk. Documentation build issue here [2757/2784] Generating html_guides with a custom command FAILED: doc/guides/html /usr/bin/python ../buildtools/call-sphinx-build.py /usr/bin/sphinx-build 21.08.0-rc1 /export/dpdk-next-eventdev/doc/guides /export/dpdk-next-eventdev/build/doc/guides -W Warning, treated as error: /export/dpdk-next-eventdev/doc/guides/rel_notes/release_21_08.rst:122:Unexpected indentation. > --- > drivers/common/cnxk/roc_sso.h | 23 ++++++ > drivers/event/cnxk/cn10k_eventdev.c | 3 +- > drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++++-- > drivers/event/cnxk/cn9k_worker.h | 4 +- > drivers/event/cnxk/cnxk_worker.h | 22 ------ > drivers/net/cnxk/cn10k_tx.c | 2 +- > drivers/net/cnxk/cn10k_tx.h | 52 +++++++++----- > drivers/net/cnxk/cn10k_tx_mseg.c | 3 +- > drivers/net/cnxk/cn10k_tx_vec.c | 2 +- > drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +- > 10 files changed, 165 insertions(+), 52 deletions(-) > > diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h > index a6030e7d8a..316c6ccd59 100644 > --- a/drivers/common/cnxk/roc_sso.h > +++ b/drivers/common/cnxk/roc_sso.h > @@ -44,6 +44,29 @@ struct roc_sso { > uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned; > } __plt_cache_aligned; > > +static __rte_always_inline void > +roc_sso_hws_head_wait(uintptr_t tag_op) > +{ > +#ifdef RTE_ARCH_ARM64 > + uint64_t tag; > + > + asm volatile(PLT_CPU_FEATURE_PREAMBLE > + " ldr %[tag], [%[tag_op]] \n" > + " tbnz %[tag], 35, done%= \n" > + " sevl \n" > + "rty%=: wfe \n" > + " ldr %[tag], [%[tag_op]] \n" > + " tbz %[tag], 35, rty%= \n" > + "done%=: \n" > + : [tag] "=&r"(tag) > + : [tag_op] "r"(tag_op)); > +#else > + /* Wait for the SWTAG/SWTAG_FULL operation */ > + while (!(plt_read64(tag_op) & BIT_ULL(35))) > + ; > +#endif > +} > + > /* SSO device initialization */ > int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso); > int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso); > diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c > index e85fa4785d..6f37c5bd23 100644 > --- a/drivers/event/cnxk/cn10k_eventdev.c > +++ b/drivers/event/cnxk/cn10k_eventdev.c > @@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, > if (ret) > *caps = 0; > else > - *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; > + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT | > + RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR; > > return 0; > } > diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h > index 7a48a6b17d..9cc0992063 100644 > --- a/drivers/event/cnxk/cn10k_worker.h > +++ b/drivers/event/cnxk/cn10k_worker.h > @@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, > NIX_RX_FASTPATH_MODES > #undef R > > -static __rte_always_inline const struct cn10k_eth_txq * > +static __rte_always_inline struct cn10k_eth_txq * > cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, > const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) > { > - return (const struct cn10k_eth_txq *) > + return (struct cn10k_eth_txq *) > txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; > } > > +static __rte_always_inline void > +cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs, > + uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr, > + uint8_t sched_type, uintptr_t base, > + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], > + const uint32_t flags) > +{ > + uint16_t port[4], queue[4]; > + struct cn10k_eth_txq *txq; > + uint16_t i, j; > + uintptr_t pa; > + > + for (i = 0; i < nb_mbufs; i += 4) { > + port[0] = mbufs[i]->port; > + port[1] = mbufs[i + 1]->port; > + port[2] = mbufs[i + 2]->port; > + port[3] = mbufs[i + 3]->port; > + > + queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]); > + queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]); > + queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]); > + queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]); > + > + if (((port[0] ^ port[1]) & (port[2] ^ port[3])) || > + ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) { > + > + for (j = 0; j < 4; j++) { > + struct rte_mbuf *m = mbufs[i + j]; > + > + txq = (struct cn10k_eth_txq *) > + txq_data[port[j]][queue[j]]; > + cn10k_nix_tx_skeleton(txq, cmd, flags); > + /* Perform header writes before barrier > + * for TSO > + */ > + if (flags & NIX_TX_OFFLOAD_TSO_F) > + cn10k_nix_xmit_prepare_tso(m, flags); > + > + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, > + txq->lso_tun_fmt); > + if (flags & NIX_TX_MULTI_SEG_F) { > + const uint16_t segdw = > + cn10k_nix_prepare_mseg( > + m, (uint64_t *)lmt_addr, > + flags); > + pa = txq->io_addr | ((segdw - 1) << 4); > + } else { > + pa = txq->io_addr | > + (cn10k_nix_tx_ext_subs(flags) + 1) > + << 4; > + } > + if (!sched_type) > + roc_sso_hws_head_wait(base + > + SSOW_LF_GWS_TAG); > + > + roc_lmt_submit_steorl(lmt_id, pa); > + } > + } else { > + txq = (struct cn10k_eth_txq *) > + txq_data[port[0]][queue[0]]; > + cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base > + + SSOW_LF_GWS_TAG, > + flags | NIX_TX_VWQE_F); > + } > + } > +} > + > static __rte_always_inline uint16_t > cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, > uint64_t *cmd, > const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], > const uint32_t flags) > { > - const struct cn10k_eth_txq *txq; > - struct rte_mbuf *m = ev->mbuf; > - uint16_t ref_cnt = m->refcnt; > + struct cn10k_eth_txq *txq; > + struct rte_mbuf *m; > uintptr_t lmt_addr; > + uint16_t ref_cnt; > uint16_t lmt_id; > uintptr_t pa; > > lmt_addr = ws->lmt_base; > ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); > + > + if (ev->event_type & RTE_EVENT_TYPE_VECTOR) { > + struct rte_mbuf **mbufs = ev->vec->mbufs; > + uint64_t meta = *(uint64_t *)ev->vec; > + > + if (meta & BIT(31)) { > + txq = (struct cn10k_eth_txq *) > + txq_data[meta >> 32][meta >> 48]; > + > + cn10k_nix_xmit_pkts_vector( > + txq, mbufs, meta & 0xFFFF, cmd, > + ws->tx_base + SSOW_LF_GWS_TAG, > + flags | NIX_TX_VWQE_F); > + } else { > + cn10k_sso_vwqe_split_tx( > + mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr, > + ev->sched_type, ws->tx_base, txq_data, flags); > + } > + rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec); > + return (meta & 0xFFFF); > + } > + > + m = ev->mbuf; > + ref_cnt = m->refcnt; > txq = cn10k_sso_hws_xtract_meta(m, txq_data); > cn10k_nix_tx_skeleton(txq, cmd, flags); > /* Perform header writes before barrier for TSO */ > @@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, > pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; > } > if (!ev->sched_type) > - cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); > + roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); > > roc_lmt_submit_steorl(lmt_id, pa); > > @@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, > > cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, > ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); > - > return 1; > } > > diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h > index 3f9751211a..cc1e141957 100644 > --- a/drivers/event/cnxk/cn9k_worker.h > +++ b/drivers/event/cnxk/cn9k_worker.h > @@ -466,7 +466,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, > const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); > if (!CNXK_TT_FROM_EVENT(ev->event)) { > cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); > - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); > + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); > cn9k_sso_txq_fc_wait(txq); > if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) > cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, > @@ -478,7 +478,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, > } else { > if (!CNXK_TT_FROM_EVENT(ev->event)) { > cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); > - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); > + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); > cn9k_sso_txq_fc_wait(txq); > if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) > cn9k_nix_xmit_one(cmd, txq->lmt_addr, > diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h > index 7891b749df..9f9ceab8a1 100644 > --- a/drivers/event/cnxk/cnxk_worker.h > +++ b/drivers/event/cnxk/cnxk_worker.h > @@ -75,26 +75,4 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op) > #endif > } > > -static __rte_always_inline void > -cnxk_sso_hws_head_wait(uintptr_t tag_op) > -{ > -#ifdef RTE_ARCH_ARM64 > - uint64_t tag; > - > - asm volatile(" ldr %[tag], [%[tag_op]] \n" > - " tbnz %[tag], 35, done%= \n" > - " sevl \n" > - "rty%=: wfe \n" > - " ldr %[tag], [%[tag_op]] \n" > - " tbz %[tag], 35, rty%= \n" > - "done%=: \n" > - : [tag] "=&r"(tag) > - : [tag_op] "r"(tag_op)); > -#else > - /* Wait for the HEAD to be set */ > - while (!(plt_read64(tag_op) & BIT_ULL(35))) > - ; > -#endif > -} > - > #endif > diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c > index 1f30bab59a..0e1276c60b 100644 > --- a/drivers/net/cnxk/cn10k_tx.c > +++ b/drivers/net/cnxk/cn10k_tx.c > @@ -16,7 +16,7 @@ > !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ > return 0; \ > return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \ > - flags); \ > + 0, flags); \ > } > > NIX_TX_FASTPATH_MODES > diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h > index eb148b8e77..f75cae07ae 100644 > --- a/drivers/net/cnxk/cn10k_tx.h > +++ b/drivers/net/cnxk/cn10k_tx.h > @@ -18,6 +18,7 @@ > * Defining it from backwards to denote its been > * not used as offload flags to pick function > */ > +#define NIX_TX_VWQE_F BIT(14) > #define NIX_TX_MULTI_SEG_F BIT(15) > > #define NIX_TX_NEED_SEND_HDR_W1 \ > @@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) > > static __rte_always_inline uint16_t > cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, > - uint64_t *cmd, const uint16_t flags) > + uint64_t *cmd, uintptr_t base, const uint16_t flags) > { > struct cn10k_eth_txq *txq = tx_queue; > const rte_iova_t io_addr = txq->io_addr; > @@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, > uint64_t lso_tun_fmt; > uint64_t data; > > - NIX_XMIT_FC_OR_RETURN(txq, pkts); > + if (!(flags & NIX_TX_VWQE_F)) { > + NIX_XMIT_FC_OR_RETURN(txq, pkts); > + /* Reduce the cached count */ > + txq->fc_cache_pkts -= pkts; > + } > > /* Get cmd skeleton */ > cn10k_nix_tx_skeleton(txq, cmd, flags); > > - /* Reduce the cached count */ > - txq->fc_cache_pkts -= pkts; > - > if (flags & NIX_TX_OFFLOAD_TSO_F) > lso_tun_fmt = txq->lso_tun_fmt; > > @@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, > lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2); > } > > + if (flags & NIX_TX_VWQE_F) > + roc_sso_hws_head_wait(base); > + > /* Trigger LMTST */ > if (burst > 16) { > data = cn10k_nix_tx_steor_data(flags); > @@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, > > static __rte_always_inline uint16_t > cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, > - uint16_t pkts, uint64_t *cmd, const uint16_t flags) > + uint16_t pkts, uint64_t *cmd, uintptr_t base, > + const uint16_t flags) > { > struct cn10k_eth_txq *txq = tx_queue; > uintptr_t pa0, pa1, lmt_addr = txq->lmt_base; > @@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, > shft += 3; > } > > + if (flags & NIX_TX_VWQE_F) > + roc_sso_hws_head_wait(base); > + > data0 = (uint64_t)data128; > data1 = (uint64_t)(data128 >> 64); > /* Make data0 similar to data1 */ > @@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0, > > static __rte_always_inline uint16_t > cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > - uint16_t pkts, uint64_t *cmd, const uint16_t flags) > + uint16_t pkts, uint64_t *cmd, uintptr_t base, > + const uint16_t flags) > { > uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; > uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; > @@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > uint64_t data[2]; > } wd; > > - NIX_XMIT_FC_OR_RETURN(txq, pkts); > - > - scalar = pkts & (NIX_DESCS_PER_LOOP - 1); > - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); > + if (!(flags & NIX_TX_VWQE_F)) { > + NIX_XMIT_FC_OR_RETURN(txq, pkts); > + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); > + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); > + /* Reduce the cached count */ > + txq->fc_cache_pkts -= pkts; > + } else { > + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); > + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); > + } > > - /* Reduce the cached count */ > - txq->fc_cache_pkts -= pkts; > /* Perform header writes before barrier for TSO */ > if (flags & NIX_TX_OFFLOAD_TSO_F) { > for (i = 0; i < pkts; i++) > @@ -1973,6 +1987,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > if (flags & NIX_TX_MULTI_SEG_F) > wd.data[0] >>= 16; > > + if (flags & NIX_TX_VWQE_F) > + roc_sso_hws_head_wait(base); > + > /* Trigger LMTST */ > if (lnum > 16) { > if (!(flags & NIX_TX_MULTI_SEG_F)) > @@ -2029,10 +2046,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > if (unlikely(scalar)) { > if (flags & NIX_TX_MULTI_SEG_F) > pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, > - scalar, cmd, flags); > + scalar, cmd, base, > + flags); > else > pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, > - cmd, flags); > + cmd, base, flags); > } > > return pkts; > @@ -2041,13 +2059,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > #else > static __rte_always_inline uint16_t > cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > - uint16_t pkts, uint64_t *cmd, const uint16_t flags) > + uint16_t pkts, uint64_t *cmd, uintptr_t base, > + const uint16_t flags) > { > RTE_SET_USED(tx_queue); > RTE_SET_USED(tx_pkts); > RTE_SET_USED(pkts); > RTE_SET_USED(cmd); > RTE_SET_USED(flags); > + RTE_SET_USED(base); > return 0; > } > #endif > diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c > index 33f6754722..4ea4c8a4e5 100644 > --- a/drivers/net/cnxk/cn10k_tx_mseg.c > +++ b/drivers/net/cnxk/cn10k_tx_mseg.c > @@ -18,7 +18,8 @@ > !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ > return 0; \ > return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \ > - (flags) | NIX_TX_MULTI_SEG_F); \ > + 0, (flags) \ > + | NIX_TX_MULTI_SEG_F); \ > } > > NIX_TX_FASTPATH_MODES > diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c > index 34e3737501..a0350496ab 100644 > --- a/drivers/net/cnxk/cn10k_tx_vec.c > +++ b/drivers/net/cnxk/cn10k_tx_vec.c > @@ -18,7 +18,7 @@ > !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ > return 0; \ > return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ > - (flags)); \ > + 0, (flags)); \ > } > > NIX_TX_FASTPATH_MODES > diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c > index 1fad81dbad..7f98f79b97 100644 > --- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c > +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c > @@ -16,7 +16,7 @@ > !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ > return 0; \ > return cn10k_nix_xmit_pkts_vector( \ > - tx_queue, tx_pkts, pkts, cmd, \ > + tx_queue, tx_pkts, pkts, cmd, 0, \ > (flags) | NIX_TX_MULTI_SEG_F); \ > } > > -- > 2.17.1 >
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter. Resize cn10k workslot fastpath structure to fit in 64B cacheline size. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- v9 Changes: - Fix doc compilation. - Remove rte prefix used in base code. v8 Changes: - Fix incorrect cq_w1 offset. - Move doc changes to 1st patch. v7 Changes: - Set correct limits for SQB aura. v6 Changes: - More code cleanup. - Fix incorrect SQB configuration and missing fc check. v5 Changes: - Use cnxk_eth_rxq_to_sp instead of manually calculating sp offset. v4 Changes: - Split patches for easier merge. v3 Changes: - Spell check. doc/guides/eventdevs/cnxk.rst | 33 +++++ doc/guides/rel_notes/release_21_08.rst | 6 + drivers/common/cnxk/roc_nix.h | 3 + drivers/common/cnxk/roc_nix_fc.c | 78 +++++++++++ drivers/common/cnxk/roc_nix_priv.h | 3 +- drivers/common/cnxk/version.map | 1 + drivers/event/cnxk/cn10k_eventdev.c | 107 ++++++++++++--- drivers/event/cnxk/cn10k_worker.c | 7 +- drivers/event/cnxk/cn10k_worker.h | 32 +++-- drivers/event/cnxk/cn9k_eventdev.c | 89 +++++++++++++ drivers/event/cnxk/cn9k_worker.h | 4 + drivers/event/cnxk/cnxk_eventdev.c | 2 + drivers/event/cnxk/cnxk_eventdev.h | 43 ++++-- drivers/event/cnxk/cnxk_eventdev_adptr.c | 158 +++++++++++++++++++++++ drivers/event/cnxk/meson.build | 9 +- 15 files changed, 528 insertions(+), 47 deletions(-) diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst index 36da3800cc..53560d3830 100644 --- a/doc/guides/eventdevs/cnxk.rst +++ b/doc/guides/eventdevs/cnxk.rst @@ -39,6 +39,15 @@ Features of the OCTEON cnxk SSO PMD are: time granularity of 2.5us on CN9K and 1us on CN10K. - Up to 256 TIM rings a.k.a event timer adapters. - Up to 8 rings traversed in parallel. +- HW managed packets enqueued from ethdev to eventdev exposed through event eth + RX adapter. +- N:1 ethernet device Rx queue to Event queue mapping. +- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` + capability while maintaining receive packet order. +- Full Rx/Tx offload support defined through ethdev queue configuration. +- HW managed event vectorization on CN10K for packets enqueued from ethdev to + eventdev configurable per each Rx queue in Rx adapter. +- Event vector transmission via Tx adapter. Prerequisites and Compilation procedure --------------------------------------- @@ -93,6 +102,15 @@ Runtime Config Options -a 0002:0e:00.0,qos=[1-50-50-50] +- ``Force Rx Back pressure`` + + Force Rx back pressure when same mempool is used across ethernet device + connected to event device. + + For example:: + + -a 0002:0e:00.0,force_rx_bp=1 + - ``TIM disable NPA`` By default chunks are allocated from NPA then TIM can automatically free @@ -160,3 +178,18 @@ Debugging Options +---+------------+-------------------------------------------------------+ | 2 | TIM | --log-level='pmd\.event\.cnxk\.timer,8' | +---+------------+-------------------------------------------------------+ + +Limitations +----------- + +Rx adapter support +~~~~~~~~~~~~~~~~~~ + +Using the same mempool for all the ethernet device ports connected to +event device would cause back pressure to be asserted only on the first +ethernet device. +Back pressure is automatically disabled when using same mempool for all the +ethernet devices connected to event device to override this applications can +use `force_rx_bp=1` device arguments. +Using unique mempool per each ethernet device is recommended when they are +connected to event device. diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index 6a902ef9ac..d40533ab44 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -117,6 +117,12 @@ New Features The experimental PMD power management API now supports managing multiple Ethernet Rx queues per lcore. +* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.** + + * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested + is net/cnxk. + * Add support for event vectorization for Rx/Tx adapter. + Removed Items ------------- diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index bb69027956..76613fe84e 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix); +void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, + uint8_t ena, uint8_t force); + /* NPC */ int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable); diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c index 47be8aa3f8..f17eba4169 100644 --- a/drivers/common/cnxk/roc_nix_fc.c +++ b/drivers/common/cnxk/roc_nix_fc.c @@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode mode) exit: return rc; } + +void +rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena, + uint8_t force) +{ + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + struct npa_lf *lf = idev_npa_obj_get(); + struct npa_aq_enq_req *req; + struct npa_aq_enq_rsp *rsp; + struct mbox *mbox; + uint32_t limit; + int rc; + + if (roc_nix_is_sdp(roc_nix)) + return; + + if (!lf) + return; + mbox = lf->mbox; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_READ; + + rc = mbox_process_msg(mbox, (void *)&rsp); + if (rc) + return; + + limit = rsp->aura.limit; + /* BP is already enabled. */ + if (rsp->aura.bp_ena) { + /* If BP ids don't match disable BP. */ + if ((rsp->aura.nix0_bpid != nix->bpid[0]) && !force) { + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + req->aura.bp_ena = 0; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); + } + return; + } + + /* BP was previously enabled but now disabled skip. */ + if (rsp->aura.bp) + return; + + req = mbox_alloc_msg_npa_aq_enq(mbox); + if (req == NULL) + return; + + req->aura_id = roc_npa_aura_handle_to_aura(pool_id); + req->ctype = NPA_AQ_CTYPE_AURA; + req->op = NPA_AQ_INSTOP_WRITE; + + if (ena) { + req->aura.nix0_bpid = nix->bpid[0]; + req->aura_mask.nix0_bpid = ~(req->aura_mask.nix0_bpid); + req->aura.bp = NIX_RQ_AURA_THRESH( + limit > 128 ? 256 : limit); /* 95% of size*/ + req->aura_mask.bp = ~(req->aura_mask.bp); + } + + req->aura.bp_ena = !!ena; + req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena); + + mbox_process(mbox); +} diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h index d9c32df442..9dc0c88a6f 100644 --- a/drivers/common/cnxk/roc_nix_priv.h +++ b/drivers/common/cnxk/roc_nix_priv.h @@ -16,7 +16,8 @@ #define NIX_SQB_LOWER_THRESH ((uint16_t)70) /* Apply BP/DROP when CQ is 95% full */ -#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100) +#define NIX_RQ_AURA_THRESH(x) (((x) * 95) / 100) /* IRQ triggered when NIX_LF_CINTX_CNT[QCOUNT] crosses this value */ #define CQ_CQE_THRESH_DEFAULT 0x1ULL diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map index e3af48c02e..8ea3e9f439 100644 --- a/drivers/common/cnxk/version.map +++ b/drivers/common/cnxk/version.map @@ -83,6 +83,7 @@ INTERNAL { roc_nix_fc_config_set; roc_nix_fc_mode_set; roc_nix_fc_mode_get; + rox_nix_fc_npa_bp_cfg; roc_nix_get_base_chan; roc_nix_get_pf; roc_nix_get_pf_func; diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index bf4052c76c..2060c8fe84 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -6,18 +6,6 @@ #include "cnxk_eventdev.h" #include "cnxk_worker.h" -static void -cn10k_init_hws_ops(struct cn10k_sso_hws *ws, uintptr_t base) -{ - ws->tag_wqe_op = base + SSOW_LF_GWS_WQE0; - ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0; - ws->updt_wqe_op = base + SSOW_LF_GWS_OP_UPD_WQP_GRP1; - ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM; - ws->swtag_untag_op = base + SSOW_LF_GWS_OP_SWTAG_UNTAG; - ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH; - ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED; -} - static uint32_t cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev) { @@ -56,7 +44,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); - cn10k_init_hws_ops(ws, ws->base); ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -135,13 +122,14 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cq_ds_cnt &= 0x3FFF3FFF0000; while (aq_cnt || cq_ds_cnt || ds_cnt) { - plt_write64(req, ws->getwrk_op); + plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0); cn10k_sso_hws_get_work_empty(ws, &ev); if (fn != NULL && ev.u64 != 0) fn(arg, ev); if (ev.sched_type != SSO_TT_EMPTY) - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, - ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush( + ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); do { val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE); } while (val & BIT_ULL(56)); @@ -205,9 +193,11 @@ cn10k_sso_hws_reset(void *arg, void *hws) if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) { - plt_write64(BIT_ULL(16) | 1, ws->getwrk_op); + plt_write64(BIT_ULL(16) | 1, + ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0)); if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */ @@ -407,6 +397,80 @@ cn10k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn10k)); } +static int +cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } +} + +static int +cn10k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn10k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn10k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -420,6 +484,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .port_unlink = cn10k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -502,6 +572,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn10k, cn10k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN10K_SSO_GW_MODE "=<int>" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index e2aa534c64..5dbae275ba 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -18,7 +18,8 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev) cn10k_sso_hws_forward_event(ws, ev); break; case RTE_EVENT_OP_RELEASE: - cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, ws->swtag_flush_op); + cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); break; default: return 0; @@ -69,7 +70,7 @@ cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return 1; } @@ -94,7 +95,7 @@ cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) if (ws->swtag_req) { ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_wqe_op); + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); return ret; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 2f093a8dd5..c7250bf9e7 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn10k_ethdev.h" +#include "cn10k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t @@ -31,7 +35,8 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) { const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(ws->tag_wqe_op)); + const uint8_t cur_tt = + CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)); /* CNXK model * cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED @@ -43,9 +48,11 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev) if (new_tt == SSO_TT_UNTAGGED) { if (cur_tt != SSO_TT_UNTAGGED) - cnxk_sso_hws_swtag_untag(ws->swtag_untag_op); + cnxk_sso_hws_swtag_untag(ws->base + + SSOW_LF_GWS_OP_SWTAG_UNTAG); } else { - cnxk_sso_hws_swtag_norm(tag, new_tt, ws->swtag_norm_op); + cnxk_sso_hws_swtag_norm(tag, new_tt, + ws->base + SSOW_LF_GWS_OP_SWTAG_NORM); } ws->swtag_req = 1; } @@ -57,8 +64,9 @@ cn10k_sso_hws_fwd_group(struct cn10k_sso_hws *ws, const struct rte_event *ev, const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - plt_write64(ev->u64, ws->updt_wqe_op); - cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op); + plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1); + cnxk_sso_hws_swtag_desched(tag, new_tt, grp, + ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED); } static __rte_always_inline void @@ -68,7 +76,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, const uint8_t grp = ev->queue_id; /* Group hasn't changed, Use SWTAG to forward the event */ - if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_wqe_op)) == grp) + if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp) cn10k_sso_hws_fwd_swtag(ws, ev); else /* @@ -93,12 +101,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" : [wdata] "+r"(gw.get_work) - : [gw_loc] "r"(ws->getwrk_op) + : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else - plt_write64(gw.u64[0], ws->getwrk_op); + plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0); do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | @@ -130,11 +139,12 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) - : [tag_loc] "r"(ws->tag_wqe_op) + : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else do { - roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op); + roc_load_pair(gw.u64[0], gw.u64[1], + ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); #endif diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 0684417eab..072800c243 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -481,6 +481,88 @@ cn9k_sso_selftest(void) return cnxk_sso_selftest(RTE_STR(event_cn9k)); } +static int +cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + + return 0; +} + +static void +cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem, + void *tstmp_info) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + dws->lookup_mem = lookup_mem; + dws->tstamp = tstmp_info; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + ws->lookup_mem = lookup_mem; + ws->tstamp = tstmp_info; + } + } +} + +static int +cn9k_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cn9k_eth_rxq *rxq; + void *lookup_mem; + void *tstmp_info; + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, + queue_conf); + if (rc) + return -EINVAL; + + rxq = eth_dev->data->rx_queues[0]; + lookup_mem = rxq->lookup_mem; + tstmp_info = rxq->tstamp; + cn9k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info); + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (rc) + return -EINVAL; + + return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -494,6 +576,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .port_unlink = cn9k_sso_port_unlink, .timeout_ticks = cnxk_sso_timeout_ticks, + .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, + .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, @@ -571,6 +659,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn9k, cn9k_pci_sso_map); RTE_PMD_REGISTER_KMOD_DEP(event_cn9k, "vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(event_cn9k, CNXK_SSO_XAE_CNT "=<int>" CNXK_SSO_GGRP_QOS "=<string>" + CNXK_SSO_FORCE_BP "=1" CN9K_SSO_SINGLE_WS "=1" CNXK_TIM_DISABLE_NPA "=1" CNXK_TIM_CHNK_SLOTS "=<int>" diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 38fca08fb6..f5a4401465 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -5,9 +5,13 @@ #ifndef __CN9K_WORKER_H__ #define __CN9K_WORKER_H__ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" +#include "cn9k_ethdev.h" +#include "cn9k_rx.h" + /* SSO Operations */ static __rte_always_inline uint8_t diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c index 7189ee3a79..cfd7fb971c 100644 --- a/drivers/event/cnxk/cnxk_eventdev.c +++ b/drivers/event/cnxk/cnxk_eventdev.c @@ -571,6 +571,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs) &dev->xae_cnt); rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict, dev); + rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_value, + &dev->force_ena_bp); rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_value, &single_ws); rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 668e51d62a..b65d725f55 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -6,6 +6,8 @@ #define __CNXK_EVENTDEV_H__ #include <rte_devargs.h> +#include <rte_ethdev.h> +#include <rte_event_eth_rx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -18,6 +20,7 @@ #define CNXK_SSO_XAE_CNT "xae_cnt" #define CNXK_SSO_GGRP_QOS "qos" +#define CNXK_SSO_FORCE_BP "force_rx_bp" #define CN9K_SSO_SINGLE_WS "single_ws" #define CN10K_SSO_GW_MODE "gw_mode" @@ -81,7 +84,10 @@ struct cnxk_sso_evdev { uint64_t nb_xaq_cfg; rte_iova_t fc_iova; struct rte_mempool *xaq_pool; + uint64_t rx_offloads; uint64_t adptr_xae_cnt; + uint16_t rx_adptr_pool_cnt; + uint64_t *rx_adptr_pools; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -89,25 +95,18 @@ struct cnxk_sso_evdev { uint32_t xae_cnt; uint8_t qos_queue_cnt; struct cnxk_sso_qos *qos_parse_data; + uint8_t force_ena_bp; /* CN9K */ uint8_t dual_ws; /* CN10K */ uint8_t gw_mode; } __rte_cache_aligned; -/* CN10K HWS ops */ -#define CN10K_SSO_HWS_OPS \ - uintptr_t swtag_desched_op; \ - uintptr_t swtag_flush_op; \ - uintptr_t swtag_untag_op; \ - uintptr_t swtag_norm_op; \ - uintptr_t updt_wqe_op; \ - uintptr_t tag_wqe_op; \ - uintptr_t getwrk_op - struct cn10k_sso_hws { - /* Get Work Fastpath data */ - CN10K_SSO_HWS_OPS; + uint64_t base; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint32_t gw_wdata; uint8_t swtag_req; uint8_t hws_id; @@ -115,7 +114,6 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; uintptr_t lmt_base; } __rte_cache_aligned; @@ -132,6 +130,9 @@ struct cn10k_sso_hws { struct cn9k_sso_hws { /* Get Work Fastpath data */ CN9K_SSO_HWS_OPS; + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t hws_id; /* Add Work Fastpath data */ @@ -148,6 +149,9 @@ struct cn9k_sso_hws_state { struct cn9k_sso_hws_dual { /* Get Work Fastpath data */ struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */ + /* PTP timestamp */ + struct cnxk_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t vws; /* Ping pong bit */ uint8_t hws_id; @@ -250,4 +254,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev, /* CN9K */ void cn9k_sso_set_rsrc(void *arg); +/* Common adapter ops */ +int cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf); +int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id); +int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); +int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); + #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 89a1d82c14..3b7ecb375a 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -2,6 +2,7 @@ * Copyright(C) 2021 Marvell. */ +#include "cnxk_ethdev.h" #include "cnxk_eventdev.h" void @@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, int i; switch (event_type) { + case RTE_EVENT_TYPE_ETHDEV: { + struct cnxk_eth_rxq_sp *rxq = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->rx_adptr_pool_cnt; i++) { + if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i]) + return; + } + + dev->rx_adptr_pool_cnt++; + old_ptr = dev->rx_adptr_pools; + dev->rx_adptr_pools = rte_realloc( + dev->rx_adptr_pools, + sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0); + if (dev->rx_adptr_pools == NULL) { + dev->adptr_xae_cnt += rxq->qconf.mp->size; + dev->rx_adptr_pools = old_ptr; + dev->rx_adptr_pool_cnt--; + return; + } + dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] = + (uint64_t)rxq->qconf.mp; + + dev->adptr_xae_cnt += rxq->qconf.mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; @@ -65,3 +92,134 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, break; } } + +static int +cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, + uint16_t port_id, const struct rte_event *ev, + uint8_t custom_flowid) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 1; + rq->tt = ev->sched_type; + rq->hwgrp = ev->queue_id; + rq->flow_tag_width = 20; + rq->wqe_skip = 1; + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4)) + << 24; + + if (custom_flowid) { + rq->flow_tag_width = 0; + rq->tag_mask |= ev->flow_id; + } + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + rq->sso_ena = 0; + rq->flow_tag_width = 32; + rq->tag_mask = 0; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +int +cnxk_sso_rx_adapter_queue_add( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t port = eth_dev->data->port_id; + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) + rc |= cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, + i, queue_conf); + } else { + rxq_sp = cnxk_eth_rxq_to_sp( + eth_dev->data->rx_queues[rx_queue_id]); + cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc |= cnxk_sso_rxq_enable( + cnxk_eth_dev, (uint16_t)rx_queue_id, port, + &queue_conf->ev, + !!(queue_conf->rx_queue_flags & + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID)); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, true, + dev->force_ena_bp); + } + + if (rc < 0) { + plt_err("Failed to configure Rx adapter port=%d, q=%d", port, + queue_conf->ev.queue_id); + return rc; + } + + dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags; + + return 0; +} + +int +cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct cnxk_eth_rxq_sp *rxq_sp; + int i, rc = 0; + + RTE_SET_USED(event_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) + cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i); + } else { + rxq_sp = cnxk_eth_rxq_to_sp( + eth_dev->data->rx_queues[rx_queue_id]); + rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id); + rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix, + rxq_sp->qconf.mp->pool_id, false, + dev->force_ena_bp); + } + + if (rc < 0) + plt_err("Failed to clear Rx adapter config port=%d, q=%d", + eth_dev->data->port_id, rx_queue_id); + + return rc; +} + +int +cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +int +cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index 87bb9f76a9..eda562f5b5 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -21,4 +21,11 @@ sources = files( 'cnxk_tim_worker.c', ) -deps += ['bus_pci', 'common_cnxk'] +extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing'] +foreach flag: extra_flags + if cc.has_argument(flag) + cflags += flag + endif +endforeach + +deps += ['bus_pci', 'common_cnxk', 'net_cnxk'] -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Rx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 136 +++++++- drivers/event/cnxk/cn10k_worker.c | 54 ---- drivers/event/cnxk/cn10k_worker.h | 97 +++++- drivers/event/cnxk/cn10k_worker_deq.c | 44 +++ drivers/event/cnxk/cn10k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn10k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_eventdev.c | 305 +++++++++++++++++- drivers/event/cnxk/cn9k_worker.c | 117 ------- drivers/event/cnxk/cn9k_worker.h | 174 ++++++++-- drivers/event/cnxk/cn9k_worker_deq.c | 44 +++ drivers/event/cnxk/cn9k_worker_deq_burst.c | 29 ++ drivers/event/cnxk/cn9k_worker_deq_tmo.c | 72 +++++ drivers/event/cnxk/cn9k_worker_dual_deq.c | 53 +++ .../event/cnxk/cn9k_worker_dual_deq_burst.c | 30 ++ drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 89 +++++ drivers/event/cnxk/cnxk_eventdev.h | 1 + drivers/event/cnxk/meson.build | 9 + 17 files changed, 1124 insertions(+), 231 deletions(-) create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 2060c8fe84..ba7d95fff7 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -237,17 +237,141 @@ static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn10k_sso_hws_deq; - event_dev->dequeue_burst = cn10k_sso_hws_deq_burst; - if (dev->is_timeout_deq) { - event_dev->dequeue = cn10k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_tmo_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_tmo_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } } diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index 5dbae275ba..c71aa37327 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -60,57 +60,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return 1; - } - - return cn10k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn10k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); - return ret; - } - - ret = cn10k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn10k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index c7250bf9e7..b724083caa 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -87,20 +87,37 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, cn10k_sso_hws_fwd_group(ws, ev, grp); } +static __rte_always_inline void +cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t -cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) +cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, void *lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; gw.get_work = ws->gw_wdata; #if defined(RTE_ARCH_ARM64) && !defined(__clang__) asm volatile( PLT_CPU_FEATURE_PREAMBLE "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n" - : [wdata] "+r"(gw.get_work) + "sub %[mbuf], %H[wdata], #0x80 \n" + : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf) : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0) : "memory"); #else @@ -109,11 +126,34 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -128,6 +168,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -138,7 +179,9 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) " ldp %[tag], %[wqp], [%[tag_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0) : "memory"); #else @@ -146,12 +189,25 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev) roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0); } while (gw.u64[0] & BIT_ULL(63)); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -170,16 +226,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn10k_worker_deq.c b/drivers/event/cnxk/cn10k_worker_deq.c new file mode 100644 index 0000000000..36ec454ccc --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return 1; \ + } \ + \ + return cn10k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_burst.c b/drivers/event/cnxk/cn10k_worker_deq_burst.c new file mode 100644 index 0000000000..29ecc551cf --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn10k_worker_deq_tmo.c b/drivers/event/cnxk/cn10k_worker_deq_tmo.c new file mode 100644 index 0000000000..c8524a27bd --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \ + return ret; \ + } \ + \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn10k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn10k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 072800c243..e386cb784a 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -252,17 +252,202 @@ static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + /* Single WS modes */ + const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + /* Dual WS modes */ + const event_dequeue_t sso_hws_dual_deq[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_t sso_hws_dual_deq_tmo_seg[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; + + const event_dequeue_burst_t + sso_hws_dual_deq_tmo_seg_burst[2][2][2][2][2][2] = { +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_burst_##name, + NIX_RX_FASTPATH_MODES +#undef R + }; event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst; - - event_dev->dequeue = cn9k_sso_hws_deq; - event_dev->dequeue_burst = cn9k_sso_hws_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_tmo_deq; - event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } } if (dev->dual_ws) { @@ -272,14 +457,110 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) event_dev->enqueue_forward_burst = cn9k_sso_hws_dual_enq_fwd_burst; - event_dev->dequeue = cn9k_sso_hws_dual_deq; - event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst; - if (dev->deq_tmo_ns) { - event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq; - event_dev->dequeue_burst = - cn9k_sso_hws_dual_tmo_deq_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = sso_hws_dual_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = sso_hws_dual_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = sso_hws_dual_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = sso_hws_dual_deq_tmo + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + sso_hws_dual_deq_tmo_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } } } + + rte_mb(); } static void * diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c index 9ceacc98dd..538bc4b0b3 100644 --- a/drivers/event/cnxk/cn9k_worker.c +++ b/drivers/event/cnxk/cn9k_worker.c @@ -60,60 +60,6 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } -uint16_t __rte_hot -cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - - RTE_SET_USED(timeout_ticks); - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return 1; - } - - return cn9k_sso_hws_get_work(ws, ev); -} - -uint16_t __rte_hot -cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events, - uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws *ws = port; - uint16_t ret = 1; - uint64_t iter; - - if (ws->swtag_req) { - ws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(ws->tag_op); - return ret; - } - - ret = cn9k_sso_hws_get_work(ws, ev); - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) - ret = cn9k_sso_hws_get_work(ws, ev); - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks); -} - /* Dual ws ops. */ uint16_t __rte_hot @@ -171,66 +117,3 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], return 1; } - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t gw; - - RTE_SET_USED(timeout_ticks); - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return 1; - } - - gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - return gw; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks); -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks) -{ - struct cn9k_sso_hws_dual *dws = port; - uint16_t ret = 1; - uint64_t iter; - - if (dws->swtag_req) { - dws->swtag_req = 0; - cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op); - return ret; - } - - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { - ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], - &dws->ws_state[!dws->vws], ev); - dws->vws = !dws->vws; - } - - return ret; -} - -uint16_t __rte_hot -cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, uint64_t timeout_ticks) -{ - RTE_SET_USED(nb_events); - - return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks); -} diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index f5a4401465..c01c00e1da 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -128,17 +128,36 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, } } +static __rte_always_inline void +cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void *const lookup_mem) +{ + const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + + cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + mbuf_init | ((uint64_t)port_id) << 48, flags); +} + static __rte_always_inline uint16_t cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, struct cn9k_sso_hws_state *ws_pair, - struct rte_event *ev) + struct rte_event *ev, const uint32_t flags, + const void *const lookup_mem, + struct cnxk_timesync_info *const tstamp) { const uint64_t set_gw = BIT_ULL(16) | 1; union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE "rty%=: \n" @@ -147,7 +166,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, " tbnz %[tag], 63, rty%= \n" "done%=: str %[gw], [%[pong]] \n" " dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op), [gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op)); #else @@ -156,12 +178,34 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); plt_write64(set_gw, ws_pair->getwrk_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -169,16 +213,22 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws, } static __rte_always_inline uint16_t -cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) +cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev, + const uint32_t flags, const void *const lookup_mem) { union { __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t tstamp_ptr; + uint64_t mbuf; plt_write64(BIT_ULL(16) | /* wait for work. */ 1, /* Use Mask set 0. */ ws->getwrk_op); + + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE " ldr %[tag], [%[tag_loc]] \n" @@ -190,7 +240,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -198,12 +251,35 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, flags, + lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *) + gw.u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, + ws->tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -218,6 +294,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) __uint128_t get_work; uint64_t u64[2]; } gw; + uint64_t mbuf; #ifdef RTE_ARCH_ARM64 asm volatile(PLT_CPU_FEATURE_PREAMBLE @@ -230,7 +307,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) " ldr %[wqp], [%[wqp_loc]] \n" " tbnz %[tag], 63, rty%= \n" "done%=: dmb ld \n" - : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]) + " sub %[mbuf], %[wqp], #0x80 \n" + : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]), + [mbuf] "=&r"(mbuf) : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op)); #else gw.u64[0] = plt_read64(ws->tag_op); @@ -238,12 +317,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev) gw.u64[0] = plt_read64(ws->tag_op); gw.u64[1] = plt_read64(ws->wqp_op); + mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); #endif gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff); + if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) { + if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + + gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]); + cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port, + gw.u64[0] & 0xFFFFF, 0, NULL); + gw.u64[1] = mbuf; + } + } + ev->event = gw.u64[0]; ev->u64 = gw.u64[1]; @@ -274,28 +366,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events); -uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); - -uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev, - uint64_t timeout_ticks); -uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port, - struct rte_event ev[], - uint16_t nb_events, - uint64_t timeout_ticks); +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks); \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks); + +NIX_RX_FASTPATH_MODES +#undef R #endif diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c new file mode 100644 index 0000000000..51ccaf4ec4 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return 1; \ + } \ + \ + return cn9k_sso_hws_get_work( \ + ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_burst.c b/drivers/event/cnxk/cn9k_worker_deq_burst.c new file mode 100644 index 0000000000..4e2801459b --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_burst.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c new file mode 100644 index 0000000000..9713d1ef00 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait(ws->tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = cn9k_sso_hws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c new file mode 100644 index 0000000000..709fa2d9ef --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return 1; \ + } \ + \ + gw = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + return gw; \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c new file mode 100644 index 0000000000..d50e1cf83f --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c new file mode 100644 index 0000000000..a0508fdf0d --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" +#include "cnxk_eventdev.h" +#include "cnxk_worker.h" + +#define R(name, f5, f4, f3, f2, f1, f0, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_##name(port, ev, \ + timeout_ticks); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \ + void *port, struct rte_event *ev, uint64_t timeout_ticks) \ + { \ + struct cn9k_sso_hws_dual *dws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (dws->swtag_req) { \ + dws->swtag_req = 0; \ + cnxk_sso_hws_swtag_wait( \ + dws->ws_state[!dws->vws].tag_op); \ + return ret; \ + } \ + \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \ + ev, flags, dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \ + ret = cn9k_sso_hws_dual_get_work( \ + &dws->ws_state[dws->vws], \ + &dws->ws_state[!dws->vws], ev, flags, \ + dws->lookup_mem, dws->tstamp); \ + dws->vws = !dws->vws; \ + } \ + \ + return ret; \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events, \ + uint64_t timeout_ticks) \ + { \ + RTE_SET_USED(nb_events); \ + \ + return cn9k_sso_hws_dual_deq_tmo_seg_##name(port, ev, \ + timeout_ticks); \ + } + +NIX_RX_FASTPATH_MODES +#undef R diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index b65d725f55..9d5d2d0339 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -33,6 +33,7 @@ #define CNXK_SSO_MZ_NAME "cnxk_evdev_mz" #define CNXK_SSO_XAQ_CACHE_CNT (0x7) #define CNXK_SSO_XAQ_SLACK (8) +#define CNXK_SSO_WQE_SG_PTR (9) #define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) #define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY) diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index eda562f5b5..c5c1c0ee8e 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -11,8 +11,17 @@ endif sources = files( 'cn9k_eventdev.c', 'cn9k_worker.c', + 'cn9k_worker_deq.c', + 'cn9k_worker_deq_burst.c', + 'cn9k_worker_deq_tmo.c', + 'cn9k_worker_dual_deq.c', + 'cn9k_worker_dual_deq_burst.c', + 'cn9k_worker_dual_deq_tmo.c', 'cn10k_eventdev.c', 'cn10k_worker.c', + 'cn10k_worker_deq.c', + 'cn10k_worker_deq_burst.c', + 'cn10k_worker_deq_tmo.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com> --- drivers/common/cnxk/roc_nix.h | 1 + drivers/common/cnxk/roc_nix_queue.c | 8 +- drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++ drivers/event/cnxk/cn9k_eventdev.c | 148 +++++++++++++++++++++++ drivers/event/cnxk/cnxk_eventdev.h | 22 +++- drivers/event/cnxk/cnxk_eventdev_adptr.c | 88 ++++++++++++++ 6 files changed, 353 insertions(+), 5 deletions(-) diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index 76613fe84e..822c1900e2 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -200,6 +200,7 @@ struct roc_nix_sq { uint64_t aura_handle; int16_t nb_sqb_bufs_adj; uint16_t nb_sqb_bufs; + uint16_t aura_sqb_bufs; plt_iova_t io_addr; void *lmt_addr; void *sqe_mem; diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c index 0604e7a18e..7e2f86eca7 100644 --- a/drivers/common/cnxk/roc_nix_queue.c +++ b/drivers/common/cnxk/roc_nix_queue.c @@ -587,12 +587,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) aura.fc_ena = 1; aura.fc_addr = (uint64_t)sq->fc; aura.fc_hyst_bits = 0; /* Store count on all updates */ - rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, nb_sqb_bufs, &aura, + rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, NIX_MAX_SQB, &aura, &pool); if (rc) goto fail; - sq->sqe_mem = plt_zmalloc(blk_sz * nb_sqb_bufs, blk_sz); + sq->sqe_mem = plt_zmalloc(blk_sz * NIX_MAX_SQB, blk_sz); if (sq->sqe_mem == NULL) { rc = NIX_ERR_NO_MEM; goto nomem; @@ -600,11 +600,13 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) /* Fill the initial buffers */ iova = (uint64_t)sq->sqe_mem; - for (count = 0; count < nb_sqb_bufs; count++) { + for (count = 0; count < NIX_MAX_SQB; count++) { roc_npa_aura_op_free(sq->aura_handle, 0, iova); iova += blk_sz; } roc_npa_aura_op_range_set(sq->aura_handle, (uint64_t)sq->sqe_mem, iova); + roc_npa_aura_limit_modify(sq->aura_handle, sq->nb_sqb_bufs); + sq->aura_sqb_bufs = NIX_MAX_SQB; return rc; nomem: diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index ba7d95fff7..8a9b04a3db 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id) /* First cache line is reserved for cookie */ ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE); ws->base = roc_sso_hws_base_get(&dev->sso, port_id); + ws->tx_base = ws->base; ws->hws_id = port_id; ws->swtag_req = 0; ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev); @@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + struct cn10k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn10k_sso_hws) + + (sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + + return 0; +} + static void cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset, cn10k_sso_hws_flush_events); if (rc < 0) @@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + rc = cn10k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + return cn10k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn10k_sso_dev_ops = { .dev_infos_get = cn10k_sso_info_get, .dev_configure = cn10k_sso_dev_configure, @@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index e386cb784a..21f80323d9 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp) return roc_sso_rsrc_init(&dev->sso, hws, hwgrp); } +static int +cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + int i; + + if (dev->tx_adptr_data == NULL) + return 0; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct cn9k_sso_hws_dual *dws = + event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(dws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + dws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&dws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = dws; + } else { + struct cn9k_sso_hws *ws = event_dev->data->ports[i]; + void *ws_cookie; + + ws_cookie = cnxk_sso_hws_get_cookie(ws); + ws_cookie = rte_realloc_socket( + ws_cookie, + sizeof(struct cnxk_sso_hws_cookie) + + sizeof(struct cn9k_sso_hws_dual) + + (sizeof(uint64_t) * + (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (ws_cookie == NULL) + return -ENOMEM; + ws = RTE_PTR_ADD(ws_cookie, + sizeof(struct cnxk_sso_hws_cookie)); + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, + sizeof(uint64_t) * (dev->max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT); + event_dev->data->ports[i] = ws; + } + } + rte_mb(); + + return 0; +} + static void cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) { @@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev) { int rc; + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset, cn9k_sso_hws_flush_events); if (rc < 0) @@ -844,6 +908,86 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static void +cn9k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id, + bool ena) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cn9k_eth_txq *txq; + struct roc_nix_sq *sq; + int i; + + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) + cn9k_sso_txq_fc_update(eth_dev, i, ena); + } else { + uint16_t sq_limit; + + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + txq = eth_dev->data->tx_queues[tx_queue_id]; + sq_limit = + ena ? RTE_MIN(CNXK_SSO_SQB_LIMIT, sq->aura_sqb_bufs) : + sq->nb_sqb_bufs; + txq->nb_sqb_bufs_adj = + sq_limit - + RTE_ALIGN_MUL_CEIL(sq_limit, + (1ULL << txq->sqes_per_sqb_log2)) / + (1ULL << txq->sqes_per_sqb_log2); + txq->nb_sqb_bufs_adj = (70 * txq->nb_sqb_bufs_adj) / 100; + } +} + +static int +cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, true); + rc = cn9k_sso_updt_tx_adptr_data(event_dev); + if (rc < 0) + return rc; + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +static int +cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + int rc; + + RTE_SET_USED(id); + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id); + if (rc < 0) + return rc; + cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, false); + return cn9k_sso_updt_tx_adptr_data(event_dev); +} + static struct rte_eventdev_ops cn9k_sso_dev_ops = { .dev_infos_get = cn9k_sso_info_get, .dev_configure = cn9k_sso_dev_configure, @@ -863,6 +1007,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del, + .timer_adapter_caps_get = cnxk_tim_caps_get, .dump = cnxk_sso_dump, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 9d5d2d0339..24e1be6a97 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -8,6 +8,7 @@ #include <rte_devargs.h> #include <rte_ethdev.h> #include <rte_event_eth_rx_adapter.h> +#include <rte_event_eth_tx_adapter.h> #include <rte_kvargs.h> #include <rte_mbuf_pool_ops.h> #include <rte_pci.h> @@ -34,6 +35,7 @@ #define CNXK_SSO_XAQ_CACHE_CNT (0x7) #define CNXK_SSO_XAQ_SLACK (8) #define CNXK_SSO_WQE_SG_PTR (9) +#define CNXK_SSO_SQB_LIMIT (0x180) #define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) #define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY) @@ -86,9 +88,12 @@ struct cnxk_sso_evdev { rte_iova_t fc_iova; struct rte_mempool *xaq_pool; uint64_t rx_offloads; + uint64_t tx_offloads; uint64_t adptr_xae_cnt; uint16_t rx_adptr_pool_cnt; uint64_t *rx_adptr_pools; + uint64_t *tx_adptr_data; + uint16_t max_port_id; uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; @@ -115,7 +120,10 @@ struct cn10k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; + /* Tx Fastpath data */ + uint64_t tx_base __rte_cache_aligned; uintptr_t lmt_base; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; /* CN9K HWS ops */ @@ -140,7 +148,9 @@ struct cn9k_sso_hws { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base; + /* Tx Fastpath data */ + uint64_t base __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cn9k_sso_hws_state { @@ -160,7 +170,9 @@ struct cn9k_sso_hws_dual { uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[CNXK_SSO_MAX_HWGRP]; - uint64_t base[2]; + /* Tx Fastpath data */ + uint64_t base[2] __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct cnxk_sso_hws_cookie { @@ -267,5 +279,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev); +int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); +int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); #endif /* __CNXK_EVENTDEV_H__ */ diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 3b7ecb375a..502da272d8 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -223,3 +223,91 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, return 0; } + +static int +cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs) +{ + return roc_npa_aura_limit_modify( + sq->aura_handle, RTE_MIN(nb_sqb_bufs, sq->aura_sqb_bufs)); +} + +static int +cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev, + uint16_t eth_port_id, uint16_t tx_queue_id, + void *txq) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + uint16_t max_port_id = dev->max_port_id; + uint64_t *txq_data = dev->tx_adptr_data; + + if (txq_data == NULL || eth_port_id > max_port_id) { + max_port_id = RTE_MAX(max_port_id, eth_port_id); + txq_data = rte_realloc_socket( + txq_data, + (sizeof(uint64_t) * (max_port_id + 1) * + RTE_MAX_QUEUES_PER_PORT), + RTE_CACHE_LINE_SIZE, event_dev->data->socket_id); + if (txq_data == NULL) + return -ENOMEM; + } + + ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) + txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq; + dev->max_port_id = max_port_id; + dev->tx_adptr_data = txq_data; + return 0; +} + +int +cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct roc_nix_sq *sq; + int i, ret; + void *txq; + + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) + cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, i); + } else { + txq = eth_dev->data->tx_queues[tx_queue_id]; + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, txq); + if (ret < 0) + return ret; + + dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags; + } + + return 0; +} + +int +cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private; + struct roc_nix_sq *sq; + int i, ret; + + RTE_SET_USED(event_dev); + if (tx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) + cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, i); + } else { + sq = &cnxk_eth_dev->sqs[tx_queue_id]; + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs); + ret = cnxk_sso_updt_tx_queue_data( + event_dev, eth_dev->data->port_id, tx_queue_id, NULL); + if (ret < 0) + return ret; + } + + return 0; +} -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add support for event eth Tx adapter fastpath operations. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 38 ++++++++ drivers/event/cnxk/cn10k_worker.h | 67 +++++++++++++ drivers/event/cnxk/cn10k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn10k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_eventdev.c | 81 ++++++++++++++++ drivers/event/cnxk/cn9k_worker.h | 97 +++++++++++++++++++ drivers/event/cnxk/cn9k_worker_dual_tx_enq.c | 23 +++++ .../event/cnxk/cn9k_worker_dual_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq.c | 23 +++++ drivers/event/cnxk/cn9k_worker_tx_enq_seg.c | 23 +++++ drivers/event/cnxk/cnxk_worker.h | 27 +++--- drivers/event/cnxk/meson.build | 6 ++ 12 files changed, 440 insertions(+), 14 deletions(-) create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 8a9b04a3db..e462f770c5 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; @@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; } static void diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index b724083caa..3c90c85009 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -11,6 +11,7 @@ #include "cn10k_ethdev.h" #include "cn10k_rx.h" +#include "cn10k_tx.h" /* SSO Operations */ @@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn10k_eth_txq * +cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn10k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline uint16_t +cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, + uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + const struct cn10k_eth_txq *txq; + struct rte_mbuf *m = ev->mbuf; + uint16_t ref_cnt = m->refcnt; + uintptr_t lmt_addr; + uint16_t lmt_id; + uintptr_t pa; + + lmt_addr = ws->lmt_base; + ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + txq = cn10k_sso_hws_xtract_meta(m, txq_data); + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; + } + if (!ev->sched_type) + cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, + ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c new file mode 100644 index 0000000000..f9968ac0d0 --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c new file mode 100644 index 0000000000..a24fc42e5a --- /dev/null +++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn10k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 21f80323d9..a69edff195 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -430,6 +430,39 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = { +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; @@ -510,6 +543,25 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) } } + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + if (dev->dual_ws) { event_dev->enqueue = cn9k_sso_hws_dual_enq; event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst; @@ -618,8 +670,37 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] + */ + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } } + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; rte_mb(); } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index c01c00e1da..3f9751211a 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -11,6 +11,7 @@ #include "cn9k_ethdev.h" #include "cn9k_rx.h" +#include "cn9k_tx.h" /* SSO Operations */ @@ -416,4 +417,100 @@ NIX_RX_FASTPATH_MODES NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline void +cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq) +{ + while (!(((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) + << (txq)->sqes_per_sqb_log2)) + ; +} + +static __rte_always_inline const struct cn9k_eth_txq * +cn9k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn9k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline void +cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m, + uint64_t *cmd, const uint32_t flags) +{ + roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags)); + cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt); +} + +static __rte_always_inline uint16_t +cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + struct rte_mbuf *m = ev->mbuf; + const struct cn9k_eth_txq *txq; + uint16_t ref_cnt = m->refcnt; + + /* Perform header writes before barrier for TSO */ + cn9k_nix_xmit_prepare_tso(m, flags); + /* Lets commit any changes in the packet here in case when + * fast free is set as no further changes will be made to mbuf. + * In case of fast free is not set, both cn9k_nix_prepare_mseg() + * and cn9k_nix_xmit_prepare() has a barrier after refcnt update. + */ + if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_io_wmb(); + txq = cn9k_sso_hws_xtract_meta(m, txq_data); + cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags); + + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + cn9k_sso_txq_fc_wait(txq); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, + txq->io_addr, segdw); + } else { + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, + segdw); + } + } else { + if (!CNXK_TT_FROM_EVENT(ev->event)) { + cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + cn9k_sso_txq_fc_wait(txq); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_one(cmd, txq->lmt_addr, + txq->io_addr, flags); + } else { + cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, + flags); + } + } + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG, + base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c new file mode 100644 index 0000000000..92e2981f02 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws_dual *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c new file mode 100644 index 0000000000..dfb574cf95 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws_dual *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c new file mode 100644 index 0000000000..3df649c0c8 --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c new file mode 100644 index 0000000000..0efe29113e --- /dev/null +++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2021 Marvell. + */ + +#include "cn9k_worker.h" + +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h index 4eb46ae162..7891b749df 100644 --- a/drivers/event/cnxk/cnxk_worker.h +++ b/drivers/event/cnxk/cnxk_worker.h @@ -79,21 +79,20 @@ static __rte_always_inline void cnxk_sso_hws_head_wait(uintptr_t tag_op) { #ifdef RTE_ARCH_ARM64 - uint64_t swtp; - - asm volatile(PLT_CPU_FEATURE_PREAMBLE - " ldr %[swtb], [%[swtp_loc]] \n" - " tbz %[swtb], 35, done%= \n" - " sevl \n" - "rty%=: wfe \n" - " ldr %[swtb], [%[swtp_loc]] \n" - " tbnz %[swtb], 35, rty%= \n" - "done%=: \n" - : [swtb] "=&r"(swtp) - : [swtp_loc] "r"(tag_op)); + uint64_t tag; + + asm volatile(" ldr %[tag], [%[tag_op]] \n" + " tbnz %[tag], 35, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[tag], [%[tag_op]] \n" + " tbz %[tag], 35, rty%= \n" + "done%=: \n" + : [tag] "=&r"(tag) + : [tag_op] "r"(tag_op)); #else - /* Wait for the SWTAG/SWTAG_FULL operation */ - while (plt_read64(tag_op) & BIT_ULL(35)) + /* Wait for the HEAD to be set */ + while (!(plt_read64(tag_op) & BIT_ULL(35))) ; #endif } diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build index c5c1c0ee8e..13e0634e86 100644 --- a/drivers/event/cnxk/meson.build +++ b/drivers/event/cnxk/meson.build @@ -17,11 +17,17 @@ sources = files( 'cn9k_worker_dual_deq.c', 'cn9k_worker_dual_deq_burst.c', 'cn9k_worker_dual_deq_tmo.c', + 'cn9k_worker_tx_enq.c', + 'cn9k_worker_tx_enq_seg.c', + 'cn9k_worker_dual_tx_enq.c', + 'cn9k_worker_dual_tx_enq_seg.c', 'cn10k_eventdev.c', 'cn10k_worker.c', 'cn10k_worker_deq.c', 'cn10k_worker_deq_burst.c', 'cn10k_worker_deq_tmo.c', + 'cn10k_worker_tx_enq.c', + 'cn10k_worker_tx_enq_seg.c', 'cnxk_eventdev.c', 'cnxk_eventdev_adptr.c', 'cnxk_eventdev_selftest.c', -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add event vector support for cnxk event Rx adapter, add control path APIs to get vector limits and ability to configure event vectorization on a given Rx queue. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 106 ++++++++++++++++++++++- drivers/event/cnxk/cnxk_eventdev.h | 2 + drivers/event/cnxk/cnxk_eventdev_adptr.c | 25 ++++++ drivers/net/cnxk/cnxk_ethdev.h | 2 +- 4 files changed, 133 insertions(+), 2 deletions(-) diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e462f770c5..e85fa4785d 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, else *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | - RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID; + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID | + RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR; return 0; } @@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id); } +static int +cn10k_sso_rx_adapter_vector_limits( + const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, + struct rte_event_eth_rx_adapter_vector_limits *limits) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (ret) + return -ENOTSUP; + + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + limits->log2_sz = true; + limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2; + limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2; + limits->min_timeout_ns = + (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100; + limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns; + + return 0; +} + +static int +cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev, + uint16_t port_id, uint16_t rq_id, uint16_t sz, + uint64_t tmo_ns, struct rte_mempool *vmp) +{ + struct roc_nix_rq *rq; + + rq = &cnxk_eth_dev->rqs[rq_id]; + + if (!rq->sso_ena) + return -EINVAL; + if (rq->flow_tag_width == 0) + return -EINVAL; + + rq->vwqe_ena = 1; + rq->vwqe_first_skip = 0; + rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id); + rq->vwqe_max_sz_exp = rte_log2_u32(sz); + rq->vwqe_wait_tmo = + tmo_ns / + ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100); + rq->tag_mask = (port_id & 0xF) << 20; + rq->tag_mask |= + (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4)) + << 24; + + return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0); +} + +static int +cn10k_sso_rx_adapter_vector_config( + const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_event_vector_config *config) +{ + struct cnxk_eth_dev *cnxk_eth_dev; + struct cnxk_sso_evdev *dev; + int i, rc; + + rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8); + if (rc) + return -EINVAL; + + dev = cnxk_sso_pmd_priv(event_dev); + cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev); + if (rx_queue_id < 0) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, i, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + } else { + + cnxk_sso_updt_xae_cnt(dev, config->vector_mp, + RTE_EVENT_TYPE_ETHDEV_VECTOR); + rc = cnxk_sso_xae_reconfigure( + (struct rte_eventdev *)(uintptr_t)event_dev); + rc = cnxk_sso_rx_adapter_vwqe_enable( + cnxk_eth_dev, eth_dev->data->port_id, rx_queue_id, + config->vector_sz, config->vector_timeout_ns, + config->vector_mp); + if (rc) + return -EINVAL; + } + + return 0; +} + static int cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev, uint32_t *caps) @@ -739,6 +839,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = { .eth_rx_adapter_start = cnxk_sso_rx_adapter_start, .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop, + .eth_rx_adapter_vector_limits_get = cn10k_sso_rx_adapter_vector_limits, + .eth_rx_adapter_event_vector_config = + cn10k_sso_rx_adapter_vector_config, + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get, .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add, .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del, diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h index 24e1be6a97..fc49b88d6f 100644 --- a/drivers/event/cnxk/cnxk_eventdev.h +++ b/drivers/event/cnxk/cnxk_eventdev.h @@ -97,6 +97,8 @@ struct cnxk_sso_evdev { uint16_t tim_adptr_ring_cnt; uint16_t *timer_adptr_rings; uint64_t *timer_adptr_sz; + uint16_t vec_pool_cnt; + uint64_t *vec_pools; /* Dev args */ uint32_t xae_cnt; uint8_t qos_queue_cnt; diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c index 502da272d8..baf2f2aa6b 100644 --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c @@ -38,6 +38,31 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data, dev->adptr_xae_cnt += rxq->qconf.mp->size; break; } + case RTE_EVENT_TYPE_ETHDEV_VECTOR: { + struct rte_mempool *mp = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->vec_pool_cnt; i++) { + if ((uint64_t)mp == dev->vec_pools[i]) + return; + } + + dev->vec_pool_cnt++; + old_ptr = dev->vec_pools; + dev->vec_pools = + rte_realloc(dev->vec_pools, + sizeof(uint64_t) * dev->vec_pool_cnt, 0); + if (dev->vec_pools == NULL) { + dev->adptr_xae_cnt += mp->size; + dev->vec_pools = old_ptr; + dev->vec_pool_cnt--; + return; + } + dev->vec_pools[dev->vec_pool_cnt - 1] = (uint64_t)mp; + + dev->adptr_xae_cnt += mp->size; + break; + } case RTE_EVENT_TYPE_TIMER: { struct cnxk_tim_ring *timr = data; uint16_t *old_ring_ptr; diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h index 4eead03905..2528b3cdaa 100644 --- a/drivers/net/cnxk/cnxk_ethdev.h +++ b/drivers/net/cnxk/cnxk_ethdev.h @@ -238,7 +238,7 @@ struct cnxk_eth_txq_sp { } __plt_cache_aligned; static inline struct cnxk_eth_dev * -cnxk_eth_pmd_priv(struct rte_eth_dev *eth_dev) +cnxk_eth_pmd_priv(const struct rte_eth_dev *eth_dev) { return eth_dev->data->dev_private; } -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Rx event vector fastpath to convert HW defined metadata into rte_mbuf and rte_event_vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/cnxk/cn10k_worker.h | 56 ++++++++ drivers/net/cnxk/cn10k_rx.h | 200 ++++++++++++++++----------- drivers/net/cnxk/cn10k_rx_vec.c | 2 +- drivers/net/cnxk/cn10k_rx_vec_mseg.c | 5 +- 4 files changed, 178 insertions(+), 85 deletions(-) diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 3c90c85009..7a48a6b17d 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -5,6 +5,8 @@ #ifndef __CN10K_WORKER_H__ #define __CN10K_WORKER_H__ +#include <rte_vect.h> + #include "cnxk_ethdev.h" #include "cnxk_eventdev.h" #include "cnxk_worker.h" @@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, mbuf_init | ((uint64_t)port_id) << 48, flags); } +static __rte_always_inline void +cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, + void *lookup_mem, void *tstamp) +{ + uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + struct rte_event_vector *vec; + uint16_t nb_mbufs, non_vec; + uint64_t **wqe; + + mbuf_init |= ((uint64_t)port_id) << 48; + vec = (struct rte_event_vector *)vwqe; + wqe = vec->u64s; + + nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP); + nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs, + flags | NIX_RX_VWQE_F, lookup_mem, + tstamp); + wqe += nb_mbufs; + non_vec = vec->nb_elem - nb_mbufs; + + while (non_vec) { + struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0]; + struct rte_mbuf *mbuf; + uint64_t tstamp_ptr; + + mbuf = (struct rte_mbuf *)((char *)cqe - + sizeof(struct rte_mbuf)); + cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, + mbuf_init, flags); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) + + CNXK_SSO_WQE_SG_PTR); + cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, + flags & NIX_RX_OFFLOAD_TSTAMP_F, + flags & NIX_RX_MULTI_SEG_F, + (uint64_t *)tstamp_ptr); + wqe[0] = (uint64_t *)mbuf; + non_vec--; + wqe++; + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, const uint32_t flags, void *lookup_mem) @@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, flags & NIX_RX_MULTI_SEG_F, (uint64_t *)tstamp_ptr); gw.u64[1] = mbuf; + } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) == + RTE_EVENT_TYPE_ETHDEV_VECTOR) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]); + __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1]; + + vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | + ((vwqe_hdr & 0xFFFF) << 48) | + ((uint64_t)port << 32); + *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr; + cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem, + ws->tstamp); } } diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index d9572b19e7..4c5288b2cc 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -21,6 +21,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_RX_VWQE_F BIT(14) #define NIX_RX_MULTI_SEG_F BIT(15) #define CNXK_NIX_CQ_ENTRY_SZ 128 @@ -28,6 +29,11 @@ #define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x)) #define CQE_SZ(x) ((x) * CNXK_NIX_CQ_ENTRY_SZ) +#define CQE_PTR_OFF(b, i, o, f) \ + (((f) & NIX_RX_VWQE_F) ? \ + (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \ + (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o))) + union mbuf_initializer { struct { uint16_t data_off; @@ -317,61 +323,87 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf) } static __rte_always_inline uint16_t -cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) +cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, + const uint16_t flags, void *lookup_mem, + struct cnxk_timesync_info *tstamp) { - struct cn10k_eth_rxq *rxq = rx_queue; - uint16_t packets = 0; + struct cn10k_eth_rxq *rxq = args; + const uint64_t mbuf_initializer = (flags & NIX_RX_VWQE_F) ? + *(uint64_t *)args : + rxq->mbuf_initializer; + const uint64x2_t data_off = flags & NIX_RX_VWQE_F ? + vdupq_n_u64(0x80ULL) : + vdupq_n_u64(rxq->data_off); + const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask; + const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata; + const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc; uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23; - const uint64_t mbuf_initializer = rxq->mbuf_initializer; - const uint64x2_t data_off = vdupq_n_u64(rxq->data_off); uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3; uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer); uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer); struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3; - const uint16_t *lookup_mem = rxq->lookup_mem; - const uint32_t qmask = rxq->qmask; - const uint64_t wdata = rxq->wdata; - const uintptr_t desc = rxq->desc; uint8x16_t f0, f1, f2, f3; - uint32_t head = rxq->head; + uint16_t packets = 0; uint16_t pkts_left; - - pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); - pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); - - /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + uint32_t head; + uintptr_t cq0; + + if (!(flags & NIX_RX_VWQE_F)) { + lookup_mem = rxq->lookup_mem; + head = rxq->head; + + pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); + pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); + /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) + tstamp = rxq->tstamp; + } else { + RTE_SET_USED(head); + } while (packets < pkts) { - /* Exit loop if head is about to wrap and become unaligned */ - if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < - NIX_DESCS_PER_LOOP) { - pkts_left += (pkts - packets); - break; - } + if (!(flags & NIX_RX_VWQE_F)) { + /* Exit loop if head is about to wrap and become + * unaligned. + */ + if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < + NIX_DESCS_PER_LOOP) { + pkts_left += (pkts - packets); + break; + } - const uintptr_t cq0 = desc + CQE_SZ(head); + cq0 = desc + CQE_SZ(head); + } else { + cq0 = (uintptr_t)&mbufs[packets]; + } /* Prefetch N desc ahead */ - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(8))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(9))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(10))); - rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(11))); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 8, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 9, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 10, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 11, 0, flags)); /* Get NIX_RX_SG_S for size and buffer pointer */ - cq0_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(0) + 64)); - cq1_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(1) + 64)); - cq2_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(2) + 64)); - cq3_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(3) + 64)); - - /* Extract mbuf from NIX_RX_SG_S */ - mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); - mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); - mbuf01 = vqsubq_u64(mbuf01, data_off); - mbuf23 = vqsubq_u64(mbuf23, data_off); + cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags)); + cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags)); + cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags)); + cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags)); + + if (!(flags & NIX_RX_VWQE_F)) { + /* Extract mbuf from NIX_RX_SG_S */ + mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); + mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); + mbuf01 = vqsubq_u64(mbuf01, data_off); + mbuf23 = vqsubq_u64(mbuf23, data_off); + } else { + mbuf01 = + vsubq_u64(vld1q_u64((uint64_t *)cq0), data_off); + mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)), + data_off); + } /* Move mbufs to scalar registers for future use */ mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0); @@ -395,14 +427,14 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, f3 = vqtbl1q_u8(cq3_w8, shuf_msk); /* Load CQE word0 and word 1 */ - uint64_t cq0_w0 = ((uint64_t *)(cq0 + CQE_SZ(0)))[0]; - uint64_t cq0_w1 = ((uint64_t *)(cq0 + CQE_SZ(0)))[1]; - uint64_t cq1_w0 = ((uint64_t *)(cq0 + CQE_SZ(1)))[0]; - uint64_t cq1_w1 = ((uint64_t *)(cq0 + CQE_SZ(1)))[1]; - uint64_t cq2_w0 = ((uint64_t *)(cq0 + CQE_SZ(2)))[0]; - uint64_t cq2_w1 = ((uint64_t *)(cq0 + CQE_SZ(2)))[1]; - uint64_t cq3_w0 = ((uint64_t *)(cq0 + CQE_SZ(3)))[0]; - uint64_t cq3_w1 = ((uint64_t *)(cq0 + CQE_SZ(3)))[1]; + const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags); + const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 8, flags); + const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags); + const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 8, flags); + const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags); + const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 8, flags); + const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags); + const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 8, flags); if (flags & NIX_RX_OFFLOAD_RSS_F) { /* Fill rss in the rx_descriptor_fields1 */ @@ -459,17 +491,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) { ol_flags0 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(0) + 38), ol_flags0, - mbuf0); + *(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags), + ol_flags0, mbuf0); ol_flags1 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(1) + 38), ol_flags1, - mbuf1); + *(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags), + ol_flags1, mbuf1); ol_flags2 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(2) + 38), ol_flags2, - mbuf2); + *(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags), + ol_flags2, mbuf2); ol_flags3 = nix_update_match_id( - *(uint16_t *)(cq0 + CQE_SZ(3) + 38), ol_flags3, - mbuf3); + *(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags), + ol_flags3, mbuf3); } if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { @@ -488,7 +520,7 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, RTE_PTYPE_L2_ETHER_TIMESYNC}; const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | PKT_RX_IEEE1588_TMST | - rxq->tstamp->rx_tstamp_dynflag; + tstamp->rx_tstamp_dynflag; const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; uint64x2_t ts01, ts23, mask; uint64_t ts[4]; @@ -526,14 +558,10 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, ts[3] = vgetq_lane_u64(ts23, 1); /* Store timestamp into dynfield. */ - *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = - ts[0]; - *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = - ts[1]; - *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = - ts[2]; - *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = - ts[3]; + *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3]; /* Generate ptype mask to filter L2 ether timesync */ mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); @@ -559,9 +587,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, /* Update Rxq timestamp with the latest * timestamp. */ - rxq->tstamp->rx_ready = 1; - rxq->tstamp->rx_tstamp = - ts[31 - __builtin_clz(res)]; + tstamp->rx_ready = 1; + tstamp->rx_tstamp = ts[31 - __builtin_clz(res)]; } } @@ -584,25 +611,25 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); /* Store the mbufs to rx_pkts */ - vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); - vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); + vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); + vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); if (flags & NIX_RX_MULTI_SEG_F) { /* Multi segment is enable build mseg list for * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 0, 8, flags)), + mbuf0, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 1, 8, flags)), + mbuf1, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 2, 8, flags)), + mbuf2, mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer, flags); + (CQE_PTR_OFF(cq0, 3, 8, flags)), + mbuf3, mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; @@ -623,12 +650,18 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, __mempool_check_cookies(mbuf2->pool, (void **)&mbuf2, 1, 1); __mempool_check_cookies(mbuf3->pool, (void **)&mbuf3, 1, 1); - /* Advance head pointer and packets */ - head += NIX_DESCS_PER_LOOP; - head &= qmask; packets += NIX_DESCS_PER_LOOP; + + if (!(flags & NIX_RX_VWQE_F)) { + /* Advance head pointer and packets */ + head += NIX_DESCS_PER_LOOP; + head &= qmask; + } } + if (flags & NIX_RX_VWQE_F) + return packets; + rxq->head = head; rxq->available -= packets; @@ -637,8 +670,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, plt_write64((rxq->wdata | packets), rxq->cq_door); if (unlikely(pkts_left)) - packets += cn10k_nix_recv_pkts(rx_queue, &rx_pkts[packets], - pkts_left, flags); + packets += cn10k_nix_recv_pkts(args, &mbufs[packets], pkts_left, + flags); return packets; } @@ -647,12 +680,15 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, static inline uint16_t cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t pkts, const uint16_t flags) + uint16_t pkts, const uint16_t flags, + void *lookup_mem, void *tstamp) { + RTE_SET_USED(lookup_mem); RTE_SET_USED(rx_queue); RTE_SET_USED(rx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(flags); + RTE_SET_USED(tstamp); return 0; } diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c index 93528a44f9..166735ad59 100644 --- a/drivers/net/cnxk/cn10k_rx_vec.c +++ b/drivers/net/cnxk/cn10k_rx_vec.c @@ -12,7 +12,7 @@ uint16_t pkts) \ { \ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags)); \ + (flags), NULL, NULL); \ } NIX_RX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c index 04d1e46c82..1f44dddddd 100644 --- a/drivers/net/cnxk/cn10k_rx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c @@ -9,8 +9,9 @@ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ { \ - return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ - (flags) | NIX_RX_MULTI_SEG_F); \ + return cn10k_nix_recv_pkts_vector( \ + rx_queue, rx_pkts, pkts, (flags) | NIX_RX_MULTI_SEG_F, \ + NULL, NULL); \ } NIX_RX_FASTPATH_MODES -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add Tx event vector fastpath, integrate event vector Tx routine into Tx burst. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/common/cnxk/roc_sso.h | 23 ++++++ drivers/event/cnxk/cn10k_eventdev.c | 3 +- drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++++-- drivers/event/cnxk/cn9k_worker.h | 4 +- drivers/event/cnxk/cnxk_worker.h | 22 ------ drivers/net/cnxk/cn10k_tx.c | 2 +- drivers/net/cnxk/cn10k_tx.h | 52 +++++++++----- drivers/net/cnxk/cn10k_tx_mseg.c | 3 +- drivers/net/cnxk/cn10k_tx_vec.c | 2 +- drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +- 10 files changed, 165 insertions(+), 52 deletions(-) diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h index a6030e7d8a..b28f6089cc 100644 --- a/drivers/common/cnxk/roc_sso.h +++ b/drivers/common/cnxk/roc_sso.h @@ -44,6 +44,29 @@ struct roc_sso { uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned; } __plt_cache_aligned; +static __plt_always_inline void +roc_sso_hws_head_wait(uintptr_t tag_op) +{ +#ifdef RTE_ARCH_ARM64 + uint64_t tag; + + asm volatile(PLT_CPU_FEATURE_PREAMBLE + " ldr %[tag], [%[tag_op]] \n" + " tbnz %[tag], 35, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[tag], [%[tag_op]] \n" + " tbz %[tag], 35, rty%= \n" + "done%=: \n" + : [tag] "=&r"(tag) + : [tag_op] "r"(tag_op)); +#else + /* Wait for the SWTAG/SWTAG_FULL operation */ + while (!(plt_read64(tag_op) & BIT_ULL(35))) + ; +#endif +} + /* SSO device initialization */ int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso); int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso); diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index e85fa4785d..6f37c5bd23 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, if (ret) *caps = 0; else - *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR; return 0; } diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 7a48a6b17d..9cc0992063 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R -static __rte_always_inline const struct cn10k_eth_txq * +static __rte_always_inline struct cn10k_eth_txq * cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) { - return (const struct cn10k_eth_txq *) + return (struct cn10k_eth_txq *) txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; } +static __rte_always_inline void +cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs, + uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr, + uint8_t sched_type, uintptr_t base, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + uint16_t port[4], queue[4]; + struct cn10k_eth_txq *txq; + uint16_t i, j; + uintptr_t pa; + + for (i = 0; i < nb_mbufs; i += 4) { + port[0] = mbufs[i]->port; + port[1] = mbufs[i + 1]->port; + port[2] = mbufs[i + 2]->port; + port[3] = mbufs[i + 3]->port; + + queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]); + queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]); + queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]); + queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]); + + if (((port[0] ^ port[1]) & (port[2] ^ port[3])) || + ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) { + + for (j = 0; j < 4; j++) { + struct rte_mbuf *m = mbufs[i + j]; + + txq = (struct cn10k_eth_txq *) + txq_data[port[j]][queue[j]]; + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier + * for TSO + */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, + txq->lso_tun_fmt); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg( + m, (uint64_t *)lmt_addr, + flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | + (cn10k_nix_tx_ext_subs(flags) + 1) + << 4; + } + if (!sched_type) + roc_sso_hws_head_wait(base + + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + } + } else { + txq = (struct cn10k_eth_txq *) + txq_data[port[0]][queue[0]]; + cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base + + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } + } +} + static __rte_always_inline uint16_t cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], const uint32_t flags) { - const struct cn10k_eth_txq *txq; - struct rte_mbuf *m = ev->mbuf; - uint16_t ref_cnt = m->refcnt; + struct cn10k_eth_txq *txq; + struct rte_mbuf *m; uintptr_t lmt_addr; + uint16_t ref_cnt; uint16_t lmt_id; uintptr_t pa; lmt_addr = ws->lmt_base; ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + + if (ev->event_type & RTE_EVENT_TYPE_VECTOR) { + struct rte_mbuf **mbufs = ev->vec->mbufs; + uint64_t meta = *(uint64_t *)ev->vec; + + if (meta & BIT(31)) { + txq = (struct cn10k_eth_txq *) + txq_data[meta >> 32][meta >> 48]; + + cn10k_nix_xmit_pkts_vector( + txq, mbufs, meta & 0xFFFF, cmd, + ws->tx_base + SSOW_LF_GWS_TAG, + flags | NIX_TX_VWQE_F); + } else { + cn10k_sso_vwqe_split_tx( + mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr, + ev->sched_type, ws->tx_base, txq_data, flags); + } + rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec); + return (meta & 0xFFFF); + } + + m = ev->mbuf; + ref_cnt = m->refcnt; txq = cn10k_sso_hws_xtract_meta(m, txq_data); cn10k_nix_tx_skeleton(txq, cmd, flags); /* Perform header writes before barrier for TSO */ @@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; } if (!ev->sched_type) - cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); roc_lmt_submit_steorl(lmt_id, pa); @@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); - return 1; } diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 3f9751211a..cc1e141957 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -466,7 +466,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); cn9k_sso_txq_fc_wait(txq); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, @@ -478,7 +478,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, } else { if (!CNXK_TT_FROM_EVENT(ev->event)) { cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); cn9k_sso_txq_fc_wait(txq); if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) cn9k_nix_xmit_one(cmd, txq->lmt_addr, diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h index 7891b749df..9f9ceab8a1 100644 --- a/drivers/event/cnxk/cnxk_worker.h +++ b/drivers/event/cnxk/cnxk_worker.h @@ -75,26 +75,4 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op) #endif } -static __rte_always_inline void -cnxk_sso_hws_head_wait(uintptr_t tag_op) -{ -#ifdef RTE_ARCH_ARM64 - uint64_t tag; - - asm volatile(" ldr %[tag], [%[tag_op]] \n" - " tbnz %[tag], 35, done%= \n" - " sevl \n" - "rty%=: wfe \n" - " ldr %[tag], [%[tag_op]] \n" - " tbz %[tag], 35, rty%= \n" - "done%=: \n" - : [tag] "=&r"(tag) - : [tag_op] "r"(tag_op)); -#else - /* Wait for the HEAD to be set */ - while (!(plt_read64(tag_op) & BIT_ULL(35))) - ; -#endif -} - #endif diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 1f30bab59a..0e1276c60b 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \ - flags); \ + 0, flags); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index eb148b8e77..f75cae07ae 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -18,6 +18,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_TX_VWQE_F BIT(14) #define NIX_TX_MULTI_SEG_F BIT(15) #define NIX_TX_NEED_SEND_HDR_W1 \ @@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) static __rte_always_inline uint16_t cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, - uint64_t *cmd, const uint16_t flags) + uint64_t *cmd, uintptr_t base, const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; const rte_iova_t io_addr = txq->io_addr; @@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, uint64_t lso_tun_fmt; uint64_t data; - NIX_XMIT_FC_OR_RETURN(txq, pkts); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } /* Get cmd skeleton */ cn10k_nix_tx_skeleton(txq, cmd, flags); - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; - if (flags & NIX_TX_OFFLOAD_TSO_F) lso_tun_fmt = txq->lso_tun_fmt; @@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2); } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (burst > 16) { data = cn10k_nix_tx_steor_data(flags); @@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { struct cn10k_eth_txq *txq = tx_queue; uintptr_t pa0, pa1, lmt_addr = txq->lmt_base; @@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, shft += 3; } + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + data0 = (uint64_t)data128; data1 = (uint64_t)(data128 >> 64); /* Make data0 similar to data1 */ @@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0, static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; @@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, uint64_t data[2]; } wd; - NIX_XMIT_FC_OR_RETURN(txq, pkts); - - scalar = pkts & (NIX_DESCS_PER_LOOP - 1); - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (!(flags & NIX_TX_VWQE_F)) { + NIX_XMIT_FC_OR_RETURN(txq, pkts); + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + } else { + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + } - /* Reduce the cached count */ - txq->fc_cache_pkts -= pkts; /* Perform header writes before barrier for TSO */ if (flags & NIX_TX_OFFLOAD_TSO_F) { for (i = 0; i < pkts; i++) @@ -1973,6 +1987,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (flags & NIX_TX_MULTI_SEG_F) wd.data[0] >>= 16; + if (flags & NIX_TX_VWQE_F) + roc_sso_hws_head_wait(base); + /* Trigger LMTST */ if (lnum > 16) { if (!(flags & NIX_TX_MULTI_SEG_F)) @@ -2029,10 +2046,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, if (unlikely(scalar)) { if (flags & NIX_TX_MULTI_SEG_F) pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, - scalar, cmd, flags); + scalar, cmd, base, + flags); else pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, - cmd, flags); + cmd, base, flags); } return pkts; @@ -2041,13 +2059,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, #else static __rte_always_inline uint16_t cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t pkts, uint64_t *cmd, const uint16_t flags) + uint16_t pkts, uint64_t *cmd, uintptr_t base, + const uint16_t flags) { RTE_SET_USED(tx_queue); RTE_SET_USED(tx_pkts); RTE_SET_USED(pkts); RTE_SET_USED(cmd); RTE_SET_USED(flags); + RTE_SET_USED(base); return 0; } #endif diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c index 33f6754722..4ea4c8a4e5 100644 --- a/drivers/net/cnxk/cn10k_tx_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_mseg.c @@ -18,7 +18,8 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \ - (flags) | NIX_TX_MULTI_SEG_F); \ + 0, (flags) \ + | NIX_TX_MULTI_SEG_F); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 34e3737501..a0350496ab 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -18,7 +18,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ - (flags)); \ + 0, (flags)); \ } NIX_TX_FASTPATH_MODES diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c index 1fad81dbad..7f98f79b97 100644 --- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c @@ -16,7 +16,7 @@ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ return 0; \ return cn10k_nix_xmit_pkts_vector( \ - tx_queue, tx_pkts, pkts, cmd, \ + tx_queue, tx_pkts, pkts, cmd, 0, \ (flags) | NIX_TX_MULTI_SEG_F); \ } -- 2.17.1
On Wed, Jul 14, 2021 at 2:33 PM <pbhagavatula@marvell.com> wrote: > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Add Tx event vector fastpath, integrate event vector Tx routine > into Tx burst. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Series Acked-by: Jerin Jacob <jerinj@marvell.com> Series v9 Applied to dpdk-next-net-eventdev/for-main. Thanks > --- > drivers/common/cnxk/roc_sso.h | 23 ++++++ > drivers/event/cnxk/cn10k_eventdev.c | 3 +- > drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++++-- > drivers/event/cnxk/cn9k_worker.h | 4 +- > drivers/event/cnxk/cnxk_worker.h | 22 ------ > drivers/net/cnxk/cn10k_tx.c | 2 +- > drivers/net/cnxk/cn10k_tx.h | 52 +++++++++----- > drivers/net/cnxk/cn10k_tx_mseg.c | 3 +- > drivers/net/cnxk/cn10k_tx_vec.c | 2 +- > drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +- > 10 files changed, 165 insertions(+), 52 deletions(-) > > diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h > index a6030e7d8a..b28f6089cc 100644 > --- a/drivers/common/cnxk/roc_sso.h > +++ b/drivers/common/cnxk/roc_sso.h > @@ -44,6 +44,29 @@ struct roc_sso { > uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned; > } __plt_cache_aligned; > > +static __plt_always_inline void > +roc_sso_hws_head_wait(uintptr_t tag_op) > +{ > +#ifdef RTE_ARCH_ARM64 > + uint64_t tag; > + > + asm volatile(PLT_CPU_FEATURE_PREAMBLE > + " ldr %[tag], [%[tag_op]] \n" > + " tbnz %[tag], 35, done%= \n" > + " sevl \n" > + "rty%=: wfe \n" > + " ldr %[tag], [%[tag_op]] \n" > + " tbz %[tag], 35, rty%= \n" > + "done%=: \n" > + : [tag] "=&r"(tag) > + : [tag_op] "r"(tag_op)); > +#else > + /* Wait for the SWTAG/SWTAG_FULL operation */ > + while (!(plt_read64(tag_op) & BIT_ULL(35))) > + ; > +#endif > +} > + > /* SSO device initialization */ > int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso); > int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso); > diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c > index e85fa4785d..6f37c5bd23 100644 > --- a/drivers/event/cnxk/cn10k_eventdev.c > +++ b/drivers/event/cnxk/cn10k_eventdev.c > @@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, > if (ret) > *caps = 0; > else > - *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; > + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT | > + RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR; > > return 0; > } > diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h > index 7a48a6b17d..9cc0992063 100644 > --- a/drivers/event/cnxk/cn10k_worker.h > +++ b/drivers/event/cnxk/cn10k_worker.h > @@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, > NIX_RX_FASTPATH_MODES > #undef R > > -static __rte_always_inline const struct cn10k_eth_txq * > +static __rte_always_inline struct cn10k_eth_txq * > cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, > const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) > { > - return (const struct cn10k_eth_txq *) > + return (struct cn10k_eth_txq *) > txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; > } > > +static __rte_always_inline void > +cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs, > + uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr, > + uint8_t sched_type, uintptr_t base, > + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], > + const uint32_t flags) > +{ > + uint16_t port[4], queue[4]; > + struct cn10k_eth_txq *txq; > + uint16_t i, j; > + uintptr_t pa; > + > + for (i = 0; i < nb_mbufs; i += 4) { > + port[0] = mbufs[i]->port; > + port[1] = mbufs[i + 1]->port; > + port[2] = mbufs[i + 2]->port; > + port[3] = mbufs[i + 3]->port; > + > + queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]); > + queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]); > + queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]); > + queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]); > + > + if (((port[0] ^ port[1]) & (port[2] ^ port[3])) || > + ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) { > + > + for (j = 0; j < 4; j++) { > + struct rte_mbuf *m = mbufs[i + j]; > + > + txq = (struct cn10k_eth_txq *) > + txq_data[port[j]][queue[j]]; > + cn10k_nix_tx_skeleton(txq, cmd, flags); > + /* Perform header writes before barrier > + * for TSO > + */ > + if (flags & NIX_TX_OFFLOAD_TSO_F) > + cn10k_nix_xmit_prepare_tso(m, flags); > + > + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, > + txq->lso_tun_fmt); > + if (flags & NIX_TX_MULTI_SEG_F) { > + const uint16_t segdw = > + cn10k_nix_prepare_mseg( > + m, (uint64_t *)lmt_addr, > + flags); > + pa = txq->io_addr | ((segdw - 1) << 4); > + } else { > + pa = txq->io_addr | > + (cn10k_nix_tx_ext_subs(flags) + 1) > + << 4; > + } > + if (!sched_type) > + roc_sso_hws_head_wait(base + > + SSOW_LF_GWS_TAG); > + > + roc_lmt_submit_steorl(lmt_id, pa); > + } > + } else { > + txq = (struct cn10k_eth_txq *) > + txq_data[port[0]][queue[0]]; > + cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base > + + SSOW_LF_GWS_TAG, > + flags | NIX_TX_VWQE_F); > + } > + } > +} > + > static __rte_always_inline uint16_t > cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, > uint64_t *cmd, > const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], > const uint32_t flags) > { > - const struct cn10k_eth_txq *txq; > - struct rte_mbuf *m = ev->mbuf; > - uint16_t ref_cnt = m->refcnt; > + struct cn10k_eth_txq *txq; > + struct rte_mbuf *m; > uintptr_t lmt_addr; > + uint16_t ref_cnt; > uint16_t lmt_id; > uintptr_t pa; > > lmt_addr = ws->lmt_base; > ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); > + > + if (ev->event_type & RTE_EVENT_TYPE_VECTOR) { > + struct rte_mbuf **mbufs = ev->vec->mbufs; > + uint64_t meta = *(uint64_t *)ev->vec; > + > + if (meta & BIT(31)) { > + txq = (struct cn10k_eth_txq *) > + txq_data[meta >> 32][meta >> 48]; > + > + cn10k_nix_xmit_pkts_vector( > + txq, mbufs, meta & 0xFFFF, cmd, > + ws->tx_base + SSOW_LF_GWS_TAG, > + flags | NIX_TX_VWQE_F); > + } else { > + cn10k_sso_vwqe_split_tx( > + mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr, > + ev->sched_type, ws->tx_base, txq_data, flags); > + } > + rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec); > + return (meta & 0xFFFF); > + } > + > + m = ev->mbuf; > + ref_cnt = m->refcnt; > txq = cn10k_sso_hws_xtract_meta(m, txq_data); > cn10k_nix_tx_skeleton(txq, cmd, flags); > /* Perform header writes before barrier for TSO */ > @@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, > pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; > } > if (!ev->sched_type) > - cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); > + roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG); > > roc_lmt_submit_steorl(lmt_id, pa); > > @@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, > > cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG, > ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); > - > return 1; > } > > diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h > index 3f9751211a..cc1e141957 100644 > --- a/drivers/event/cnxk/cn9k_worker.h > +++ b/drivers/event/cnxk/cn9k_worker.h > @@ -466,7 +466,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, > const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); > if (!CNXK_TT_FROM_EVENT(ev->event)) { > cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); > - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); > + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); > cn9k_sso_txq_fc_wait(txq); > if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) > cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, > @@ -478,7 +478,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, > } else { > if (!CNXK_TT_FROM_EVENT(ev->event)) { > cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); > - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); > + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); > cn9k_sso_txq_fc_wait(txq); > if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) > cn9k_nix_xmit_one(cmd, txq->lmt_addr, > diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h > index 7891b749df..9f9ceab8a1 100644 > --- a/drivers/event/cnxk/cnxk_worker.h > +++ b/drivers/event/cnxk/cnxk_worker.h > @@ -75,26 +75,4 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op) > #endif > } > > -static __rte_always_inline void > -cnxk_sso_hws_head_wait(uintptr_t tag_op) > -{ > -#ifdef RTE_ARCH_ARM64 > - uint64_t tag; > - > - asm volatile(" ldr %[tag], [%[tag_op]] \n" > - " tbnz %[tag], 35, done%= \n" > - " sevl \n" > - "rty%=: wfe \n" > - " ldr %[tag], [%[tag_op]] \n" > - " tbz %[tag], 35, rty%= \n" > - "done%=: \n" > - : [tag] "=&r"(tag) > - : [tag_op] "r"(tag_op)); > -#else > - /* Wait for the HEAD to be set */ > - while (!(plt_read64(tag_op) & BIT_ULL(35))) > - ; > -#endif > -} > - > #endif > diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c > index 1f30bab59a..0e1276c60b 100644 > --- a/drivers/net/cnxk/cn10k_tx.c > +++ b/drivers/net/cnxk/cn10k_tx.c > @@ -16,7 +16,7 @@ > !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ > return 0; \ > return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \ > - flags); \ > + 0, flags); \ > } > > NIX_TX_FASTPATH_MODES > diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h > index eb148b8e77..f75cae07ae 100644 > --- a/drivers/net/cnxk/cn10k_tx.h > +++ b/drivers/net/cnxk/cn10k_tx.h > @@ -18,6 +18,7 @@ > * Defining it from backwards to denote its been > * not used as offload flags to pick function > */ > +#define NIX_TX_VWQE_F BIT(14) > #define NIX_TX_MULTI_SEG_F BIT(15) > > #define NIX_TX_NEED_SEND_HDR_W1 \ > @@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) > > static __rte_always_inline uint16_t > cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, > - uint64_t *cmd, const uint16_t flags) > + uint64_t *cmd, uintptr_t base, const uint16_t flags) > { > struct cn10k_eth_txq *txq = tx_queue; > const rte_iova_t io_addr = txq->io_addr; > @@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, > uint64_t lso_tun_fmt; > uint64_t data; > > - NIX_XMIT_FC_OR_RETURN(txq, pkts); > + if (!(flags & NIX_TX_VWQE_F)) { > + NIX_XMIT_FC_OR_RETURN(txq, pkts); > + /* Reduce the cached count */ > + txq->fc_cache_pkts -= pkts; > + } > > /* Get cmd skeleton */ > cn10k_nix_tx_skeleton(txq, cmd, flags); > > - /* Reduce the cached count */ > - txq->fc_cache_pkts -= pkts; > - > if (flags & NIX_TX_OFFLOAD_TSO_F) > lso_tun_fmt = txq->lso_tun_fmt; > > @@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, > lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2); > } > > + if (flags & NIX_TX_VWQE_F) > + roc_sso_hws_head_wait(base); > + > /* Trigger LMTST */ > if (burst > 16) { > data = cn10k_nix_tx_steor_data(flags); > @@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, > > static __rte_always_inline uint16_t > cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, > - uint16_t pkts, uint64_t *cmd, const uint16_t flags) > + uint16_t pkts, uint64_t *cmd, uintptr_t base, > + const uint16_t flags) > { > struct cn10k_eth_txq *txq = tx_queue; > uintptr_t pa0, pa1, lmt_addr = txq->lmt_base; > @@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, > shft += 3; > } > > + if (flags & NIX_TX_VWQE_F) > + roc_sso_hws_head_wait(base); > + > data0 = (uint64_t)data128; > data1 = (uint64_t)(data128 >> 64); > /* Make data0 similar to data1 */ > @@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0, > > static __rte_always_inline uint16_t > cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > - uint16_t pkts, uint64_t *cmd, const uint16_t flags) > + uint16_t pkts, uint64_t *cmd, uintptr_t base, > + const uint16_t flags) > { > uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; > uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; > @@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > uint64_t data[2]; > } wd; > > - NIX_XMIT_FC_OR_RETURN(txq, pkts); > - > - scalar = pkts & (NIX_DESCS_PER_LOOP - 1); > - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); > + if (!(flags & NIX_TX_VWQE_F)) { > + NIX_XMIT_FC_OR_RETURN(txq, pkts); > + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); > + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); > + /* Reduce the cached count */ > + txq->fc_cache_pkts -= pkts; > + } else { > + scalar = pkts & (NIX_DESCS_PER_LOOP - 1); > + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); > + } > > - /* Reduce the cached count */ > - txq->fc_cache_pkts -= pkts; > /* Perform header writes before barrier for TSO */ > if (flags & NIX_TX_OFFLOAD_TSO_F) { > for (i = 0; i < pkts; i++) > @@ -1973,6 +1987,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > if (flags & NIX_TX_MULTI_SEG_F) > wd.data[0] >>= 16; > > + if (flags & NIX_TX_VWQE_F) > + roc_sso_hws_head_wait(base); > + > /* Trigger LMTST */ > if (lnum > 16) { > if (!(flags & NIX_TX_MULTI_SEG_F)) > @@ -2029,10 +2046,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > if (unlikely(scalar)) { > if (flags & NIX_TX_MULTI_SEG_F) > pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, > - scalar, cmd, flags); > + scalar, cmd, base, > + flags); > else > pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, > - cmd, flags); > + cmd, base, flags); > } > > return pkts; > @@ -2041,13 +2059,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > #else > static __rte_always_inline uint16_t > cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > - uint16_t pkts, uint64_t *cmd, const uint16_t flags) > + uint16_t pkts, uint64_t *cmd, uintptr_t base, > + const uint16_t flags) > { > RTE_SET_USED(tx_queue); > RTE_SET_USED(tx_pkts); > RTE_SET_USED(pkts); > RTE_SET_USED(cmd); > RTE_SET_USED(flags); > + RTE_SET_USED(base); > return 0; > } > #endif > diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c > index 33f6754722..4ea4c8a4e5 100644 > --- a/drivers/net/cnxk/cn10k_tx_mseg.c > +++ b/drivers/net/cnxk/cn10k_tx_mseg.c > @@ -18,7 +18,8 @@ > !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ > return 0; \ > return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \ > - (flags) | NIX_TX_MULTI_SEG_F); \ > + 0, (flags) \ > + | NIX_TX_MULTI_SEG_F); \ > } > > NIX_TX_FASTPATH_MODES > diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c > index 34e3737501..a0350496ab 100644 > --- a/drivers/net/cnxk/cn10k_tx_vec.c > +++ b/drivers/net/cnxk/cn10k_tx_vec.c > @@ -18,7 +18,7 @@ > !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ > return 0; \ > return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ > - (flags)); \ > + 0, (flags)); \ > } > > NIX_TX_FASTPATH_MODES > diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c > index 1fad81dbad..7f98f79b97 100644 > --- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c > +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c > @@ -16,7 +16,7 @@ > !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ > return 0; \ > return cn10k_nix_xmit_pkts_vector( \ > - tx_queue, tx_pkts, pkts, cmd, \ > + tx_queue, tx_pkts, pkts, cmd, 0, \ > (flags) | NIX_TX_MULTI_SEG_F); \ > } > > -- > 2.17.1 >
On Wed, Jul 14, 2021 at 11:02 AM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Add support for event eth Rx adapter fastpath operations.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
This patch triggers a build issue for arm64 cross compiling on my
system with a 8.3 toolchain from Linaro.
I ended up upgrading my toolchain (which solved the issue), but some
users might hit this, so posting for info:
[2813/2834] Compiling C object
drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c.o
FAILED: drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c.o
aarch64-linux-gnu-gcc -Idrivers/libtmp_rte_event_cnxk.a.p -Idrivers
-I../../dpdk/drivers -Idrivers/event/cnxk
-I../../dpdk/drivers/event/cnxk -Ilib/eventdev
-I../../dpdk/lib/eventdev -I. -I../../dpdk -Iconfig
-I../../dpdk/config -Ilib/eal/include -I../../dpdk/lib/eal/include
-Ilib/eal/linux/include -I../../dpdk/lib/eal/linux/include
-Ilib/eal/arm/include -I../../dpdk/lib/eal/arm/include
-Ilib/eal/common -I../../dpdk/lib/eal/common -Ilib/eal
-I../../dpdk/lib/eal -Ilib/kvargs -I../../dpdk/lib/kvargs
-Ilib/metrics -I../../dpdk/lib/metrics -Ilib/telemetry
-I../../dpdk/lib/telemetry -Ilib/ring -I../../dpdk/lib/ring
-Ilib/ethdev -I../../dpdk/lib/ethdev -Ilib/net -I../../dpdk/lib/net
-Ilib/mbuf -I../../dpdk/lib/mbuf -Ilib/mempool
-I../../dpdk/lib/mempool -Ilib/meter -I../../dpdk/lib/meter -Ilib/hash
-I../../dpdk/lib/hash -Ilib/rcu -I../../dpdk/lib/rcu -Ilib/timer
-I../../dpdk/lib/timer -Ilib/cryptodev -I../../dpdk/lib/cryptodev
-Idrivers/bus/pci -I../../dpdk/drivers/bus/pci
-I../../dpdk/drivers/bus/pci/linux -Ilib/pci -I../../dpdk/lib/pci
-Idrivers/common/cnxk -I../../dpdk/drivers/common/cnxk -Ilib/security
-I../../dpdk/lib/security -Idrivers/net/cnxk
-I../../dpdk/drivers/net/cnxk -Idrivers/bus/vdev
-I../../dpdk/drivers/bus/vdev -Idrivers/mempool/cnxk
-I../../dpdk/drivers/mempool/cnxk -fdiagnostics-color=always -pipe
-D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Werror -O2 -g -include
rte_config.h -Wextra -Wcast-qual -Wdeprecated -Wformat
-Wformat-nonliteral -Wformat-security -Wmissing-declarations
-Wmissing-prototypes -Wnested-externs -Wold-style-definition
-Wpointer-arith -Wsign-compare -Wstrict-prototypes -Wundef
-Wwrite-strings -Wno-packed-not-aligned
-Wno-missing-field-initializers -D_GNU_SOURCE -fPIC -march=armv8-a+crc
-DALLOW_EXPERIMENTAL_API -DALLOW_INTERNAL_API -Wno-format-truncation
-flax-vector-conversions -Wno-strict-aliasing
-DRTE_LOG_DEFAULT_LOGTYPE=pmd.event.cnxk -MD -MQ
drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c.o -MF
drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c.o.d -o
drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c.o -c
../../dpdk/drivers/event/cnxk/cn10k_worker_deq.c
{standard input}: Assembler messages:
{standard input}:1392: Error: reg pair must start from even reg at
operand 1 -- `caspl x23,x24,x23,x24,[x2]'
{standard input}:10473: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:15726: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:19146: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x4]'
{standard input}:28825: Error: reg pair must start from even reg at
operand 1 -- `caspl x7,x8,x7,x8,[x3]'
{standard input}:30845: Error: reg pair must start from even reg at
operand 1 -- `caspl x27,x28,x27,x28,[x2]'
{standard input}:34301: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x3]'
{standard input}:40152: Error: reg pair must start from even reg at
operand 1 -- `caspl x7,x8,x7,x8,[x2]'
{standard input}:44998: Error: reg pair must start from even reg at
operand 1 -- `caspl x7,x8,x7,x8,[x2]'
{standard input}:52457: Error: reg pair must start from even reg at
operand 1 -- `caspl x27,x28,x27,x28,[x2]'
{standard input}:58407: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:62121: Error: reg pair must start from even reg at
operand 1 -- `caspl x7,x8,x7,x8,[x4]'
{standard input}:64121: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:67572: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x4]'
{standard input}:69764: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:88814: Error: reg pair must start from even reg at
operand 1 -- `caspl x27,x28,x27,x28,[x2]'
{standard input}:92747: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x4]'
{standard input}:95490: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:99628: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x2]'
{standard input}:102765: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:115148: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x4]'
{standard input}:122005: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x4]'
{standard input}:140039: Error: reg pair must start from even reg at
operand 1 -- `caspl x21,x22,x21,x22,[x2]'
{standard input}:147676: Error: reg pair must start from even reg at
operand 1 -- `caspl x21,x22,x21,x22,[x2]'
{standard input}:154953: Error: reg pair must start from even reg at
operand 1 -- `caspl x21,x22,x21,x22,[x2]'
{standard input}:159334: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x2]'
{standard input}:162769: Error: reg pair must start from even reg at
operand 1 -- `caspl x17,x18,x17,x18,[x2]'
{standard input}:167453: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x2]'
{standard input}:171071: Error: reg pair must start from even reg at
operand 1 -- `caspl x17,x18,x17,x18,[x2]'
{standard input}:179105: Error: reg pair must start from even reg at
operand 1 -- `caspl x23,x24,x23,x24,[x2]'
{standard input}:186966: Error: reg pair must start from even reg at
operand 1 -- `caspl x23,x24,x23,x24,[x2]'
{standard input}:191653: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x3]'
{standard input}:195360: Error: reg pair must start from even reg at
operand 1 -- `caspl x15,x16,x15,x16,[x3]'
{standard input}:204312: Error: reg pair must start from even reg at
operand 1 -- `caspl x15,x16,x15,x16,[x3]'
{standard input}:209537: Error: reg pair must start from even reg at
operand 1 -- `caspl x5,x6,x5,x6,[x2]'
{standard input}:222601: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:228793: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:234946: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:240956: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:258235: Error: reg pair must start from even reg at
operand 1 -- `caspl x5,x6,x5,x6,[x2]'
{standard input}:264084: Error: reg pair must start from even reg at
operand 1 -- `caspl x5,x6,x5,x6,[x2]'
{standard input}:270355: Error: reg pair must start from even reg at
operand 1 -- `caspl x7,x8,x7,x8,[x3]'
{standard input}:272988: Error: reg pair must start from even reg at
operand 1 -- `caspl x21,x22,x21,x22,[x2]'
{standard input}:277045: Error: reg pair must start from even reg at
operand 1 -- `caspl x7,x8,x7,x8,[x3]'
{standard input}:279878: Error: reg pair must start from even reg at
operand 1 -- `caspl x21,x22,x21,x22,[x2]'
{standard input}:297340: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x3]'
{standard input}:304594: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x3]'
{standard input}:315184: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:322794: Error: reg pair must start from even reg at
operand 1 -- `caspl x19,x20,x19,x20,[x2]'
{standard input}:327357: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x2]'
{standard input}:335754: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x2]'
{standard input}:361049: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x3]'
{standard input}:364869: Error: reg pair must start from even reg at
operand 1 -- `caspl x15,x16,x15,x16,[x2]'
{standard input}:370062: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x3]'
{standard input}:374066: Error: reg pair must start from even reg at
operand 1 -- `caspl x15,x16,x15,x16,[x2]'
{standard input}:382804: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:391016: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:441361: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x3]'
--
David Marchand
>-----Original Message----- >From: David Marchand <david.marchand@redhat.com> >Sent: Tuesday, July 20, 2021 4:33 PM >To: Pavan Nikhilesh Bhagavatula <pbhagavatula@marvell.com> >Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Shijith Thotton ><sthotton@marvell.com>; dev <dev@dpdk.org> >Subject: [EXT] Re: [dpdk-dev] [PATCH v9 2/7] event/cnxk: add Rx >adapter fastpath ops > >External Email > >---------------------------------------------------------------------- >On Wed, Jul 14, 2021 at 11:02 AM <pbhagavatula@marvell.com> wrote: >> >> From: Pavan Nikhilesh <pbhagavatula@marvell.com> >> >> Add support for event eth Rx adapter fastpath operations. >> >> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > >This patch triggers a build issue for arm64 cross compiling on my >system with a 8.3 toolchain from Linaro. >I ended up upgrading my toolchain (which solved the issue), but some >users might hit this, so posting for info: This is a known compiler bug https://bugs.dpdk.org/show_bug.cgi?id=697 > > >[2813/2834] Compiling C object >drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c. >o >FAILED: >drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c. >o >aarch64-linux-gnu-gcc -Idrivers/libtmp_rte_event_cnxk.a.p -Idrivers >-I../../dpdk/drivers -Idrivers/event/cnxk >-I../../dpdk/drivers/event/cnxk -Ilib/eventdev >-I../../dpdk/lib/eventdev -I. -I../../dpdk -Iconfig >-I../../dpdk/config -Ilib/eal/include -I../../dpdk/lib/eal/include >-Ilib/eal/linux/include -I../../dpdk/lib/eal/linux/include >-Ilib/eal/arm/include -I../../dpdk/lib/eal/arm/include >-Ilib/eal/common -I../../dpdk/lib/eal/common -Ilib/eal >-I../../dpdk/lib/eal -Ilib/kvargs -I../../dpdk/lib/kvargs >-Ilib/metrics -I../../dpdk/lib/metrics -Ilib/telemetry >-I../../dpdk/lib/telemetry -Ilib/ring -I../../dpdk/lib/ring >-Ilib/ethdev -I../../dpdk/lib/ethdev -Ilib/net -I../../dpdk/lib/net >-Ilib/mbuf -I../../dpdk/lib/mbuf -Ilib/mempool >-I../../dpdk/lib/mempool -Ilib/meter -I../../dpdk/lib/meter -Ilib/hash >-I../../dpdk/lib/hash -Ilib/rcu -I../../dpdk/lib/rcu -Ilib/timer >-I../../dpdk/lib/timer -Ilib/cryptodev -I../../dpdk/lib/cryptodev >-Idrivers/bus/pci -I../../dpdk/drivers/bus/pci >-I../../dpdk/drivers/bus/pci/linux -Ilib/pci -I../../dpdk/lib/pci >-Idrivers/common/cnxk -I../../dpdk/drivers/common/cnxk -Ilib/security >-I../../dpdk/lib/security -Idrivers/net/cnxk >-I../../dpdk/drivers/net/cnxk -Idrivers/bus/vdev >-I../../dpdk/drivers/bus/vdev -Idrivers/mempool/cnxk >-I../../dpdk/drivers/mempool/cnxk -fdiagnostics-color=always -pipe >-D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Werror -O2 -g -include >rte_config.h -Wextra -Wcast-qual -Wdeprecated -Wformat >-Wformat-nonliteral -Wformat-security -Wmissing-declarations >-Wmissing-prototypes -Wnested-externs -Wold-style-definition >-Wpointer-arith -Wsign-compare -Wstrict-prototypes -Wundef >-Wwrite-strings -Wno-packed-not-aligned >-Wno-missing-field-initializers -D_GNU_SOURCE -fPIC -march=armv8- >a+crc >-DALLOW_EXPERIMENTAL_API -DALLOW_INTERNAL_API -Wno-format- >truncation >-flax-vector-conversions -Wno-strict-aliasing >-DRTE_LOG_DEFAULT_LOGTYPE=pmd.event.cnxk -MD -MQ >drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c. >o -MF >drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c. >o.d -o >drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c. >o -c >../../dpdk/drivers/event/cnxk/cn10k_worker_deq.c >{standard input}: Assembler messages: >{standard input}:1392: Error: reg pair must start from even reg at >operand 1 -- `caspl x23,x24,x23,x24,[x2]' >{standard input}:10473: Error: reg pair must start from even reg at >operand 1 -- `caspl x3,x4,x3,x4,[x2]' >{standard input}:15726: Error: reg pair must start from even reg at >operand 1 -- `caspl x3,x4,x3,x4,[x2]' >{standard input}:19146: Error: reg pair must start from even reg at >operand 1 -- `caspl x9,x10,x9,x10,[x4]' >{standard input}:28825: Error: reg pair must start from even reg at >operand 1 -- `caspl x7,x8,x7,x8,[x3]' >{standard input}:30845: Error: reg pair must start from even reg at >operand 1 -- `caspl x27,x28,x27,x28,[x2]' >{standard input}:34301: Error: reg pair must start from even reg at >operand 1 -- `caspl x9,x10,x9,x10,[x3]' >{standard input}:40152: Error: reg pair must start from even reg at >operand 1 -- `caspl x7,x8,x7,x8,[x2]' >{standard input}:44998: Error: reg pair must start from even reg at >operand 1 -- `caspl x7,x8,x7,x8,[x2]' >{standard input}:52457: Error: reg pair must start from even reg at >operand 1 -- `caspl x27,x28,x27,x28,[x2]' >{standard input}:58407: Error: reg pair must start from even reg at >operand 1 -- `caspl x3,x4,x3,x4,[x2]' >{standard input}:62121: Error: reg pair must start from even reg at >operand 1 -- `caspl x7,x8,x7,x8,[x4]' >{standard input}:64121: Error: reg pair must start from even reg at >operand 1 -- `caspl x3,x4,x3,x4,[x2]' >{standard input}:67572: Error: reg pair must start from even reg at >operand 1 -- `caspl x11,x12,x11,x12,[x4]' >{standard input}:69764: Error: reg pair must start from even reg at >operand 1 -- `caspl x3,x4,x3,x4,[x2]' >{standard input}:88814: Error: reg pair must start from even reg at >operand 1 -- `caspl x27,x28,x27,x28,[x2]' >{standard input}:92747: Error: reg pair must start from even reg at >operand 1 -- `caspl x9,x10,x9,x10,[x4]' >{standard input}:95490: Error: reg pair must start from even reg at >operand 1 -- `caspl x3,x4,x3,x4,[x2]' >{standard input}:99628: Error: reg pair must start from even reg at >operand 1 -- `caspl x9,x10,x9,x10,[x2]' >{standard input}:102765: Error: reg pair must start from even reg at >operand 1 -- `caspl x3,x4,x3,x4,[x2]' >{standard input}:115148: Error: reg pair must start from even reg at >operand 1 -- `caspl x11,x12,x11,x12,[x4]' >{standard input}:122005: Error: reg pair must start from even reg at >operand 1 -- `caspl x11,x12,x11,x12,[x4]' >{standard input}:140039: Error: reg pair must start from even reg at >operand 1 -- `caspl x21,x22,x21,x22,[x2]' >{standard input}:147676: Error: reg pair must start from even reg at >operand 1 -- `caspl x21,x22,x21,x22,[x2]' >{standard input}:154953: Error: reg pair must start from even reg at >operand 1 -- `caspl x21,x22,x21,x22,[x2]' >{standard input}:159334: Error: reg pair must start from even reg at >operand 1 -- `caspl x9,x10,x9,x10,[x2]' >{standard input}:162769: Error: reg pair must start from even reg at >operand 1 -- `caspl x17,x18,x17,x18,[x2]' >{standard input}:167453: Error: reg pair must start from even reg at >operand 1 -- `caspl x11,x12,x11,x12,[x2]' >{standard input}:171071: Error: reg pair must start from even reg at >operand 1 -- `caspl x17,x18,x17,x18,[x2]' >{standard input}:179105: Error: reg pair must start from even reg at >operand 1 -- `caspl x23,x24,x23,x24,[x2]' >{standard input}:186966: Error: reg pair must start from even reg at >operand 1 -- `caspl x23,x24,x23,x24,[x2]' >{standard input}:191653: Error: reg pair must start from even reg at >operand 1 -- `caspl x11,x12,x11,x12,[x3]' >{standard input}:195360: Error: reg pair must start from even reg at >operand 1 -- `caspl x15,x16,x15,x16,[x3]' >{standard input}:204312: Error: reg pair must start from even reg at >operand 1 -- `caspl x15,x16,x15,x16,[x3]' >{standard input}:209537: Error: reg pair must start from even reg at >operand 1 -- `caspl x5,x6,x5,x6,[x2]' >{standard input}:222601: Error: reg pair must start from even reg at >operand 1 -- `caspl x3,x4,x3,x4,[x2]' >{standard input}:228793: Error: reg pair must start from even reg at >operand 1 -- `caspl x3,x4,x3,x4,[x2]' >{standard input}:234946: Error: reg pair must start from even reg at >operand 1 -- `caspl x3,x4,x3,x4,[x2]' >{standard input}:240956: Error: reg pair must start from even reg at >operand 1 -- `caspl x3,x4,x3,x4,[x2]' >{standard input}:258235: Error: reg pair must start from even reg at >operand 1 -- `caspl x5,x6,x5,x6,[x2]' >{standard input}:264084: Error: reg pair must start from even reg at >operand 1 -- `caspl x5,x6,x5,x6,[x2]' >{standard input}:270355: Error: reg pair must start from even reg at >operand 1 -- `caspl x7,x8,x7,x8,[x3]' >{standard input}:272988: Error: reg pair must start from even reg at >operand 1 -- `caspl x21,x22,x21,x22,[x2]' >{standard input}:277045: Error: reg pair must start from even reg at >operand 1 -- `caspl x7,x8,x7,x8,[x3]' >{standard input}:279878: Error: reg pair must start from even reg at >operand 1 -- `caspl x21,x22,x21,x22,[x2]' >{standard input}:297340: Error: reg pair must start from even reg at >operand 1 -- `caspl x9,x10,x9,x10,[x3]' >{standard input}:304594: Error: reg pair must start from even reg at >operand 1 -- `caspl x11,x12,x11,x12,[x3]' >{standard input}:315184: Error: reg pair must start from even reg at >operand 1 -- `caspl x3,x4,x3,x4,[x2]' >{standard input}:322794: Error: reg pair must start from even reg at >operand 1 -- `caspl x19,x20,x19,x20,[x2]' >{standard input}:327357: Error: reg pair must start from even reg at >operand 1 -- `caspl x9,x10,x9,x10,[x2]' >{standard input}:335754: Error: reg pair must start from even reg at >operand 1 -- `caspl x11,x12,x11,x12,[x2]' >{standard input}:361049: Error: reg pair must start from even reg at >operand 1 -- `caspl x9,x10,x9,x10,[x3]' >{standard input}:364869: Error: reg pair must start from even reg at >operand 1 -- `caspl x15,x16,x15,x16,[x2]' >{standard input}:370062: Error: reg pair must start from even reg at >operand 1 -- `caspl x11,x12,x11,x12,[x3]' >{standard input}:374066: Error: reg pair must start from even reg at >operand 1 -- `caspl x15,x16,x15,x16,[x2]' >{standard input}:382804: Error: reg pair must start from even reg at >operand 1 -- `caspl x3,x4,x3,x4,[x2]' >{standard input}:391016: Error: reg pair must start from even reg at >operand 1 -- `caspl x3,x4,x3,x4,[x2]' >{standard input}:441361: Error: reg pair must start from even reg at >operand 1 -- `caspl x11,x12,x11,x12,[x3]' > > >-- >David Marchand
On Tue, Jul 20, 2021 at 1:43 PM Pavan Nikhilesh Bhagavatula
<pbhagavatula@marvell.com> wrote:
> >This patch triggers a build issue for arm64 cross compiling on my
> >system with a 8.3 toolchain from Linaro.
> >I ended up upgrading my toolchain (which solved the issue), but some
> >users might hit this, so posting for info:
>
> This is a known compiler bug
> https://bugs.dpdk.org/show_bug.cgi?id=697
I see this bz is still open, what is the next step, if any?
--
David Marchand