* [dpdk-dev] [PATCH v3 02/13] net/cnxk: enable ptp processing in vector Rx
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
@ 2021-06-20 20:28 ` pbhagavatula
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 03/13] net/cnxk: enable VLAN processing in vector Tx pbhagavatula
` (13 subsequent siblings)
14 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-20 20:28 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Enable PTP offload in vector Rx burst function, use vector path
for processing mbufs and finally switch to scalar when extracting
timestamp.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/net/cnxk/cn10k_ethdev.c | 1 -
drivers/net/cnxk/cn10k_rx.c | 5 +-
drivers/net/cnxk/cn10k_rx.h | 124 ++++++++++++++++++++++++++++----
drivers/net/cnxk/cn10k_rx_vec.c | 3 -
drivers/net/cnxk/cn9k_ethdev.c | 1 -
drivers/net/cnxk/cn9k_rx.c | 5 +-
drivers/net/cnxk/cn9k_rx.h | 124 ++++++++++++++++++++++++++++----
drivers/net/cnxk/cn9k_rx_vec.c | 3 -
drivers/net/cnxk/cnxk_ethdev.h | 19 ++---
9 files changed, 232 insertions(+), 53 deletions(-)
diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c
index b079edbd35..7caec6cf14 100644
--- a/drivers/net/cnxk/cn10k_ethdev.c
+++ b/drivers/net/cnxk/cn10k_ethdev.c
@@ -301,7 +301,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev)
if (nix_recalc_mtu(eth_dev))
plt_err("Failed to set MTU size for ptp");
- dev->scalar_ena = true;
dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F;
/* Setting up the function pointers as per new offload flags */
diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c
index 3a9fd71309..69e767ac3d 100644
--- a/drivers/net/cnxk/cn10k_rx.c
+++ b/drivers/net/cnxk/cn10k_rx.c
@@ -75,10 +75,7 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
- /* For PTP enabled, scalar rx function should be chosen as most of the
- * PTP apps are implemented to rx burst 1 pkt.
- */
- if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+ if (dev->scalar_ena) {
if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
return pick_rx_func(eth_dev, nix_eth_rx_burst);
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index 5926ff7f46..d9572b19e7 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -109,7 +109,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags,
static __rte_always_inline void
nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
- uint64_t rearm)
+ uint64_t rearm, const uint16_t flags)
{
const rte_iova_t *iova_list;
struct rte_mbuf *head;
@@ -125,8 +125,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
return;
}
- mbuf->pkt_len = rx->pkt_lenm1 + 1;
- mbuf->data_len = sg & 0xFFFF;
+ mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
+ CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
+ mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
+ CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
mbuf->nb_segs = nb_segs;
sg = sg >> 16;
@@ -207,7 +209,7 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
*(uint64_t *)(&mbuf->rearm_data) = val;
if (flag & NIX_RX_MULTI_SEG_F)
- nix_cqe_xtract_mseg(rx, mbuf, val);
+ nix_cqe_xtract_mseg(rx, mbuf, val, flag);
else
mbuf->next = NULL;
}
@@ -272,8 +274,9 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts,
flags);
cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp,
(flags & NIX_RX_OFFLOAD_TSTAMP_F),
- (uint64_t *)((uint8_t *)mbuf + data_off)
- );
+ (flags & NIX_RX_MULTI_SEG_F),
+ (uint64_t *)((uint8_t *)mbuf
+ + data_off));
rx_pkts[packets++] = mbuf;
roc_prefetch_store_keep(mbuf);
head++;
@@ -469,6 +472,99 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
mbuf3);
}
+ if (flags & NIX_RX_OFFLOAD_TSTAMP_F) {
+ const uint16x8_t len_off = {
+ 0, /* ptype 0:15 */
+ 0, /* ptype 16:32 */
+ CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/
+ 0, /* pktlen 16:32 */
+ CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */
+ 0,
+ 0,
+ 0};
+ const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC};
+ const uint64_t ts_olf = PKT_RX_IEEE1588_PTP |
+ PKT_RX_IEEE1588_TMST |
+ rxq->tstamp->rx_tstamp_dynflag;
+ const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8};
+ uint64x2_t ts01, ts23, mask;
+ uint64_t ts[4];
+ uint8_t res;
+
+ /* Subtract timesync length from total pkt length. */
+ f0 = vsubq_u16(f0, len_off);
+ f1 = vsubq_u16(f1, len_off);
+ f2 = vsubq_u16(f2, len_off);
+ f3 = vsubq_u16(f3, len_off);
+
+ /* Get the address of actual timestamp. */
+ ts01 = vaddq_u64(mbuf01, data_off);
+ ts23 = vaddq_u64(mbuf23, data_off);
+ /* Load timestamp from address. */
+ ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01,
+ 0),
+ ts01, 0);
+ ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01,
+ 1),
+ ts01, 1);
+ ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23,
+ 0),
+ ts23, 0);
+ ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23,
+ 1),
+ ts23, 1);
+ /* Convert from be to cpu byteorder. */
+ ts01 = vrev64q_u8(ts01);
+ ts23 = vrev64q_u8(ts23);
+ /* Store timestamp into scalar for later use. */
+ ts[0] = vgetq_lane_u64(ts01, 0);
+ ts[1] = vgetq_lane_u64(ts01, 1);
+ ts[2] = vgetq_lane_u64(ts23, 0);
+ ts[3] = vgetq_lane_u64(ts23, 1);
+
+ /* Store timestamp into dynfield. */
+ *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) =
+ ts[0];
+ *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) =
+ ts[1];
+ *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) =
+ ts[2];
+ *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) =
+ ts[3];
+
+ /* Generate ptype mask to filter L2 ether timesync */
+ mask = vdupq_n_u32(vgetq_lane_u32(f0, 0));
+ mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1);
+ mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2);
+ mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3);
+
+ /* Match against L2 ether timesync. */
+ mask = vceqq_u32(mask, ptype);
+ /* Convert from vector from scalar mask */
+ res = vaddvq_u32(vandq_u32(mask, and_mask));
+ res &= 0xF;
+
+ if (res) {
+ /* Fill in the ol_flags for any packets that
+ * matched.
+ */
+ ol_flags0 |= ((res & 0x1) ? ts_olf : 0);
+ ol_flags1 |= ((res & 0x2) ? ts_olf : 0);
+ ol_flags2 |= ((res & 0x4) ? ts_olf : 0);
+ ol_flags3 |= ((res & 0x8) ? ts_olf : 0);
+
+ /* Update Rxq timestamp with the latest
+ * timestamp.
+ */
+ rxq->tstamp->rx_ready = 1;
+ rxq->tstamp->rx_tstamp =
+ ts[31 - __builtin_clz(res)];
+ }
+ }
+
/* Form rearm_data with ol_flags */
rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1);
rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1);
@@ -496,17 +592,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
* individual mbufs in scalar mode.
*/
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(0) + 8), mbuf0,
- mbuf_initializer);
+ (cq0 + CQE_SZ(0) + 8), mbuf0,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(1) + 8), mbuf1,
- mbuf_initializer);
+ (cq0 + CQE_SZ(1) + 8), mbuf1,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(2) + 8), mbuf2,
- mbuf_initializer);
+ (cq0 + CQE_SZ(2) + 8), mbuf2,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(3) + 8), mbuf3,
- mbuf_initializer);
+ (cq0 + CQE_SZ(3) + 8), mbuf3,
+ mbuf_initializer, flags);
} else {
/* Update that no more segments */
mbuf0->next = NULL;
diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c
index 65ffa97841..93528a44f9 100644
--- a/drivers/net/cnxk/cn10k_rx_vec.c
+++ b/drivers/net/cnxk/cn10k_rx_vec.c
@@ -11,9 +11,6 @@
struct rte_mbuf **rx_pkts, \
uint16_t pkts) \
{ \
- /* TSTMP is not supported by vector */ \
- if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \
- return 0; \
return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
(flags)); \
}
diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c
index 107a540915..cb302b75d8 100644
--- a/drivers/net/cnxk/cn9k_ethdev.c
+++ b/drivers/net/cnxk/cn9k_ethdev.c
@@ -309,7 +309,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev)
if (nix_recalc_mtu(eth_dev))
plt_err("Failed to set MTU size for ptp");
- dev->scalar_ena = true;
dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F;
/* Setting up the function pointers as per new offload flags */
diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c
index d293d4eac3..7d9f1bd61f 100644
--- a/drivers/net/cnxk/cn9k_rx.c
+++ b/drivers/net/cnxk/cn9k_rx.c
@@ -75,10 +75,7 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
- /* For PTP enabled, scalar rx function should be chosen as most of the
- * PTP apps are implemented to rx burst 1 pkt.
- */
- if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+ if (dev->scalar_ena) {
if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
return pick_rx_func(eth_dev, nix_eth_rx_burst);
diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
index 5ae9e8195c..beb52f39d5 100644
--- a/drivers/net/cnxk/cn9k_rx.h
+++ b/drivers/net/cnxk/cn9k_rx.h
@@ -110,7 +110,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags,
static __rte_always_inline void
nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
- uint64_t rearm)
+ uint64_t rearm, const uint16_t flags)
{
const rte_iova_t *iova_list;
struct rte_mbuf *head;
@@ -126,8 +126,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
return;
}
- mbuf->pkt_len = rx->pkt_lenm1 + 1;
- mbuf->data_len = sg & 0xFFFF;
+ mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
+ CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
+ mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
+ CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
mbuf->nb_segs = nb_segs;
sg = sg >> 16;
@@ -210,7 +212,7 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
*(uint64_t *)(&mbuf->rearm_data) = val;
if (flag & NIX_RX_MULTI_SEG_F)
- nix_cqe_xtract_mseg(rx, mbuf, val);
+ nix_cqe_xtract_mseg(rx, mbuf, val, flag);
else
mbuf->next = NULL;
}
@@ -275,8 +277,9 @@ cn9k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts,
flags);
cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp,
(flags & NIX_RX_OFFLOAD_TSTAMP_F),
- (uint64_t *)((uint8_t *)mbuf + data_off)
- );
+ (flags & NIX_RX_MULTI_SEG_F),
+ (uint64_t *)((uint8_t *)mbuf
+ + data_off));
rx_pkts[packets++] = mbuf;
roc_prefetch_store_keep(mbuf);
head++;
@@ -472,6 +475,99 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
mbuf3);
}
+ if (flags & NIX_RX_OFFLOAD_TSTAMP_F) {
+ const uint16x8_t len_off = {
+ 0, /* ptype 0:15 */
+ 0, /* ptype 16:32 */
+ CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/
+ 0, /* pktlen 16:32 */
+ CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */
+ 0,
+ 0,
+ 0};
+ const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC};
+ const uint64_t ts_olf = PKT_RX_IEEE1588_PTP |
+ PKT_RX_IEEE1588_TMST |
+ rxq->tstamp->rx_tstamp_dynflag;
+ const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8};
+ uint64x2_t ts01, ts23, mask;
+ uint64_t ts[4];
+ uint8_t res;
+
+ /* Subtract timesync length from total pkt length. */
+ f0 = vsubq_u16(f0, len_off);
+ f1 = vsubq_u16(f1, len_off);
+ f2 = vsubq_u16(f2, len_off);
+ f3 = vsubq_u16(f3, len_off);
+
+ /* Get the address of actual timestamp. */
+ ts01 = vaddq_u64(mbuf01, data_off);
+ ts23 = vaddq_u64(mbuf23, data_off);
+ /* Load timestamp from address. */
+ ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01,
+ 0),
+ ts01, 0);
+ ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01,
+ 1),
+ ts01, 1);
+ ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23,
+ 0),
+ ts23, 0);
+ ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23,
+ 1),
+ ts23, 1);
+ /* Convert from be to cpu byteorder. */
+ ts01 = vrev64q_u8(ts01);
+ ts23 = vrev64q_u8(ts23);
+ /* Store timestamp into scalar for later use. */
+ ts[0] = vgetq_lane_u64(ts01, 0);
+ ts[1] = vgetq_lane_u64(ts01, 1);
+ ts[2] = vgetq_lane_u64(ts23, 0);
+ ts[3] = vgetq_lane_u64(ts23, 1);
+
+ /* Store timestamp into dynfield. */
+ *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) =
+ ts[0];
+ *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) =
+ ts[1];
+ *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) =
+ ts[2];
+ *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) =
+ ts[3];
+
+ /* Generate ptype mask to filter L2 ether timesync */
+ mask = vdupq_n_u32(vgetq_lane_u32(f0, 0));
+ mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1);
+ mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2);
+ mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3);
+
+ /* Match against L2 ether timesync. */
+ mask = vceqq_u32(mask, ptype);
+ /* Convert from vector from scalar mask */
+ res = vaddvq_u32(vandq_u32(mask, and_mask));
+ res &= 0xF;
+
+ if (res) {
+ /* Fill in the ol_flags for any packets that
+ * matched.
+ */
+ ol_flags0 |= ((res & 0x1) ? ts_olf : 0);
+ ol_flags1 |= ((res & 0x2) ? ts_olf : 0);
+ ol_flags2 |= ((res & 0x4) ? ts_olf : 0);
+ ol_flags3 |= ((res & 0x8) ? ts_olf : 0);
+
+ /* Update Rxq timestamp with the latest
+ * timestamp.
+ */
+ rxq->tstamp->rx_ready = 1;
+ rxq->tstamp->rx_tstamp =
+ ts[31 - __builtin_clz(res)];
+ }
+ }
+
/* Form rearm_data with ol_flags */
rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1);
rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1);
@@ -499,17 +595,17 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
* individual mbufs in scalar mode.
*/
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(0) + 8), mbuf0,
- mbuf_initializer);
+ (cq0 + CQE_SZ(0) + 8), mbuf0,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(1) + 8), mbuf1,
- mbuf_initializer);
+ (cq0 + CQE_SZ(1) + 8), mbuf1,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(2) + 8), mbuf2,
- mbuf_initializer);
+ (cq0 + CQE_SZ(2) + 8), mbuf2,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(3) + 8), mbuf3,
- mbuf_initializer);
+ (cq0 + CQE_SZ(3) + 8), mbuf3,
+ mbuf_initializer, flags);
} else {
/* Update that no more segments */
mbuf0->next = NULL;
diff --git a/drivers/net/cnxk/cn9k_rx_vec.c b/drivers/net/cnxk/cn9k_rx_vec.c
index e61c2225c6..ef5f771ef7 100644
--- a/drivers/net/cnxk/cn9k_rx_vec.c
+++ b/drivers/net/cnxk/cn9k_rx_vec.c
@@ -9,9 +9,6 @@
uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
{ \
- /* TSTMP is not supported by vector */ \
- if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \
- return 0; \
return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
(flags)); \
}
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 67b1f42531..4eead03905 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -136,13 +136,12 @@ struct cnxk_eth_qconf {
};
struct cnxk_timesync_info {
+ uint8_t rx_ready;
+ uint64_t rx_tstamp;
uint64_t rx_tstamp_dynflag;
+ int tstamp_dynfield_offset;
rte_iova_t tx_tstamp_iova;
uint64_t *tx_tstamp;
- uint64_t rx_tstamp;
- int tstamp_dynfield_offset;
- uint8_t tx_ready;
- uint8_t rx_ready;
} __plt_cache_aligned;
struct cnxk_eth_dev {
@@ -465,13 +464,15 @@ cnxk_nix_timestamp_dynfield(struct rte_mbuf *mbuf,
static __rte_always_inline void
cnxk_nix_mbuf_to_tstamp(struct rte_mbuf *mbuf,
- struct cnxk_timesync_info *tstamp, bool ts_enable,
+ struct cnxk_timesync_info *tstamp,
+ const uint8_t ts_enable, const uint8_t mseg_enable,
uint64_t *tstamp_ptr)
{
- if (ts_enable &&
- (mbuf->data_off ==
- RTE_PKTMBUF_HEADROOM + CNXK_NIX_TIMESYNC_RX_OFFSET)) {
- mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET;
+ if (ts_enable) {
+ if (!mseg_enable) {
+ mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET;
+ mbuf->data_len -= CNXK_NIX_TIMESYNC_RX_OFFSET;
+ }
/* Reading the rx timestamp inserted by CGX, viz at
* starting of the packet data.
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v3 03/13] net/cnxk: enable VLAN processing in vector Tx
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 02/13] net/cnxk: enable ptp processing in vector Rx pbhagavatula
@ 2021-06-20 20:28 ` pbhagavatula
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 04/13] net/cnxk: enable ptp " pbhagavatula
` (12 subsequent siblings)
14 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-20 20:28 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Enable VLAN offload in vector Tx burst function.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/net/cnxk/cn10k_tx.c | 3 +-
drivers/net/cnxk/cn10k_tx.h | 125 +++++++++++++++++++++++++++----
drivers/net/cnxk/cn10k_tx_vec.c | 3 +-
drivers/net/cnxk/cn9k_tx.c | 3 +-
drivers/net/cnxk/cn9k_tx.h | 128 ++++++++++++++++++++++++++++----
drivers/net/cnxk/cn9k_tx_vec.c | 3 +-
6 files changed, 227 insertions(+), 38 deletions(-)
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 18694dc704..05bc163a40 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -69,8 +69,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
if (dev->scalar_ena ||
(dev->tx_offload_flags &
- (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |
- NIX_TX_OFFLOAD_TSO_F)))
+ (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 8b1446f25c..1e16978584 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -62,9 +62,14 @@ cn10k_nix_tx_ext_subs(const uint16_t flags)
static __rte_always_inline uint8_t
cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
{
- RTE_SET_USED(flags);
- /* We can pack up to 4 packets per LMTLINE if there are no offloads. */
- return 4 << ROC_LMT_LINES_PER_CORE_LOG2;
+ return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
+ << ROC_LMT_LINES_PER_CORE_LOG2;
+}
+
+static __rte_always_inline uint8_t
+cn10k_nix_tx_dwords_per_line(const uint16_t flags)
+{
+ return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8;
}
static __rte_always_inline uint64_t
@@ -98,10 +103,9 @@ cn10k_nix_tx_steor_data(const uint16_t flags)
static __rte_always_inline uint64_t
cn10k_nix_tx_steor_vec_data(const uint16_t flags)
{
- const uint64_t dw_m1 = 0x7;
+ const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
uint64_t data;
- RTE_SET_USED(flags);
/* This will be moved to addr area */
data = dw_m1;
/* 15 vector sizes for single seg */
@@ -690,11 +694,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
{
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
- uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP];
+ uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
+ cmd2[NIX_DESCS_PER_LOOP];
uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
uint16_t left, scalar, burst, i, lmt_id;
+ uint64x2_t sendext01_w0, sendext23_w0;
+ uint64x2_t sendext01_w1, sendext23_w1;
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn10k_eth_txq *txq = tx_queue;
@@ -720,6 +727,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
sgdesc23_w0 = sgdesc01_w0;
+ /* Load command defaults into vector variables. */
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
+ sendext23_w0 = sendext01_w0;
+ sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
+ sendext23_w1 = sendext01_w1;
+ }
+
/* Get LMT base address and LMT ID as lcore id */
ROC_LMT_BASE_ID_GET(laddr, lmt_id);
left = pkts;
@@ -738,6 +753,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
senddesc23_w0 = senddesc01_w0;
sgdesc23_w0 = sgdesc01_w0;
+ /* Clear vlan enables. */
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ sendext01_w1 = vbicq_u64(sendext01_w1,
+ vdupq_n_u64(0x3FFFF00FFFF00));
+ sendext23_w1 = sendext01_w1;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1303,6 +1325,52 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
+ if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
+ /* Tx ol_flag for vlan. */
+ const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
+ /* Bit enable for VLAN1 */
+ const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
+ /* Tx ol_flag for QnQ. */
+ const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
+ /* Bit enable for VLAN0 */
+ const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
+ /* Load vlan values from packet. outer is VLAN 0 */
+ uint64x2_t ext01 = {
+ ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
+ ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
+ };
+ uint64x2_t ext23 = {
+ ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
+ ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
+ };
+
+ /* Get ol_flags of the packets. */
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* ORR vlan outer/inner values into cmd. */
+ sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
+ sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
+
+ /* Test for offload enable bits and generate masks. */
+ xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
+ mlv),
+ vandq_u64(vtstq_u64(xtmp128, olq),
+ mlq));
+ ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
+ mlv),
+ vandq_u64(vtstq_u64(ytmp128, olq),
+ mlq));
+
+ /* Set vlan enable bits into cmd based on mask. */
+ sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
+ sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
@@ -1381,16 +1449,41 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
- /* Store the prepared send desc to LMT lines */
- vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
- vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
- vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
- vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
- vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
- vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
- vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
- vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
- lnum += 1;
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
+ cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
+ cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
+ cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
+ }
+
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ /* Store the prepared send desc to LMT lines */
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
+ lnum += 1;
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
+ lnum += 1;
+ } else {
+ /* Store the prepared send desc to LMT lines */
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
+ lnum += 1;
+ }
tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
}
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index 7453f3bc98..beb5c649bb 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -14,8 +14,7 @@
uint64_t cmd[sz]; \
\
/* VLAN, TSTMP, TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \
- (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
+ if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
(flags) & NIX_TX_OFFLOAD_TSO_F) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
index b802606075..4b43cdaff9 100644
--- a/drivers/net/cnxk/cn9k_tx.c
+++ b/drivers/net/cnxk/cn9k_tx.c
@@ -68,8 +68,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
if (dev->scalar_ena ||
(dev->tx_offload_flags &
- (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |
- NIX_TX_OFFLOAD_TSO_F)))
+ (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index 1899d6670f..d5715bb52d 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -552,10 +552,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
{
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
- uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP];
+ uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
+ cmd2[NIX_DESCS_PER_LOOP];
uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
+ uint64x2_t sendext01_w0, sendext23_w0;
+ uint64x2_t sendext01_w1, sendext23_w1;
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn9k_eth_txq *txq = tx_queue;
@@ -585,8 +588,19 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
senddesc23_w0 = senddesc01_w0;
senddesc01_w1 = vdupq_n_u64(0);
senddesc23_w1 = senddesc01_w1;
- sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
- sgdesc23_w0 = sgdesc01_w0;
+
+ /* Load command defaults into vector variables. */
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]);
+ sendext23_w0 = sendext01_w0;
+ sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
+ sendext23_w1 = sendext01_w1;
+ sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]);
+ sgdesc23_w0 = sgdesc01_w0;
+ } else {
+ sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
+ sgdesc23_w0 = sgdesc01_w0;
+ }
for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
/* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
@@ -597,6 +611,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
senddesc23_w0 = senddesc01_w0;
sgdesc23_w0 = sgdesc01_w0;
+ /* Clear vlan enables. */
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ sendext01_w1 = vbicq_u64(sendext01_w1,
+ vdupq_n_u64(0x3FFFF00FFFF00));
+ sendext23_w1 = sendext01_w1;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1162,6 +1183,52 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
+ if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
+ /* Tx ol_flag for vlan. */
+ const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
+ /* Bit enable for VLAN1 */
+ const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
+ /* Tx ol_flag for QnQ. */
+ const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
+ /* Bit enable for VLAN0 */
+ const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
+ /* Load vlan values from packet. outer is VLAN 0 */
+ uint64x2_t ext01 = {
+ ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
+ ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
+ };
+ uint64x2_t ext23 = {
+ ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
+ ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
+ };
+
+ /* Get ol_flags of the packets. */
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* ORR vlan outer/inner values into cmd. */
+ sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
+ sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
+
+ /* Test for offload enable bits and generate masks. */
+ xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
+ mlv),
+ vandq_u64(vtstq_u64(xtmp128, olq),
+ mlq));
+ ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
+ mlv),
+ vandq_u64(vtstq_u64(ytmp128, olq),
+ mlq));
+
+ /* Set vlan enable bits into cmd based on mask. */
+ sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
+ sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
@@ -1247,17 +1314,50 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
- do {
- vst1q_u64(lmt_addr, cmd0[0]);
- vst1q_u64(lmt_addr + 2, cmd1[0]);
- vst1q_u64(lmt_addr + 4, cmd0[1]);
- vst1q_u64(lmt_addr + 6, cmd1[1]);
- vst1q_u64(lmt_addr + 8, cmd0[2]);
- vst1q_u64(lmt_addr + 10, cmd1[2]);
- vst1q_u64(lmt_addr + 12, cmd0[3]);
- vst1q_u64(lmt_addr + 14, cmd1[3]);
- lmt_status = roc_lmt_submit_ldeor(io_addr);
- } while (lmt_status == 0);
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
+ cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
+ cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
+ cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
+ }
+
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ /* With ext header in the command we can no longer send
+ * all 4 packets together since LMTLINE is 128bytes.
+ * Split and Tx twice.
+ */
+ do {
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd2[0]);
+ vst1q_u64(lmt_addr + 4, cmd1[0]);
+ vst1q_u64(lmt_addr + 6, cmd0[1]);
+ vst1q_u64(lmt_addr + 8, cmd2[1]);
+ vst1q_u64(lmt_addr + 10, cmd1[1]);
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ } while (lmt_status == 0);
+
+ do {
+ vst1q_u64(lmt_addr, cmd0[2]);
+ vst1q_u64(lmt_addr + 2, cmd2[2]);
+ vst1q_u64(lmt_addr + 4, cmd1[2]);
+ vst1q_u64(lmt_addr + 6, cmd0[3]);
+ vst1q_u64(lmt_addr + 8, cmd2[3]);
+ vst1q_u64(lmt_addr + 10, cmd1[3]);
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ } while (lmt_status == 0);
+ } else {
+ do {
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd1[0]);
+ vst1q_u64(lmt_addr + 4, cmd0[1]);
+ vst1q_u64(lmt_addr + 6, cmd1[1]);
+ vst1q_u64(lmt_addr + 8, cmd0[2]);
+ vst1q_u64(lmt_addr + 10, cmd1[2]);
+ vst1q_u64(lmt_addr + 12, cmd0[3]);
+ vst1q_u64(lmt_addr + 14, cmd1[3]);
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ } while (lmt_status == 0);
+ }
tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
}
diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c
index a6e7c9e542..5842facb58 100644
--- a/drivers/net/cnxk/cn9k_tx_vec.c
+++ b/drivers/net/cnxk/cn9k_tx_vec.c
@@ -14,8 +14,7 @@
uint64_t cmd[sz]; \
\
/* VLAN, TSTMP, TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \
- (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
+ if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
(flags) & NIX_TX_OFFLOAD_TSO_F) \
return 0; \
return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v3 04/13] net/cnxk: enable ptp processing in vector Tx
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 02/13] net/cnxk: enable ptp processing in vector Rx pbhagavatula
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 03/13] net/cnxk: enable VLAN processing in vector Tx pbhagavatula
@ 2021-06-20 20:28 ` pbhagavatula
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 05/13] net/cnxk: enable TSO " pbhagavatula
` (11 subsequent siblings)
14 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-20 20:28 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Enable PTP offload in vector Tx burst function. Since, we can
no-longer use a single LMT line for burst of 4, split the LMT
into two and transmit twice.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/net/cnxk/cn10k_tx.c | 4 +-
drivers/net/cnxk/cn10k_tx.h | 109 +++++++++++++++++++++++++++-----
drivers/net/cnxk/cn10k_tx_vec.c | 5 +-
drivers/net/cnxk/cn9k_tx.c | 4 +-
drivers/net/cnxk/cn9k_tx.h | 105 ++++++++++++++++++++++++++----
drivers/net/cnxk/cn9k_tx_vec.c | 5 +-
6 files changed, 192 insertions(+), 40 deletions(-)
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 05bc163a40..c4c3e65704 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -67,9 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena ||
- (dev->tx_offload_flags &
- (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
+ if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 1e16978584..8af6799ff6 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -69,7 +69,9 @@ cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
static __rte_always_inline uint8_t
cn10k_nix_tx_dwords_per_line(const uint16_t flags)
{
- return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8;
+ return (flags & NIX_TX_NEED_EXT_HDR) ?
+ ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
+ 8;
}
static __rte_always_inline uint64_t
@@ -695,13 +697,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
- cmd2[NIX_DESCS_PER_LOOP];
+ cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
uint16_t left, scalar, burst, i, lmt_id;
uint64x2_t sendext01_w0, sendext23_w0;
uint64x2_t sendext01_w1, sendext23_w1;
+ uint64x2_t sendmem01_w0, sendmem23_w0;
+ uint64x2_t sendmem01_w1, sendmem23_w1;
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn10k_eth_txq *txq = tx_queue;
@@ -733,6 +737,12 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w0 = sendext01_w0;
sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
sendext23_w1 = sendext01_w1;
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
+ sendmem23_w0 = sendmem01_w0;
+ sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
+ sendmem23_w1 = sendmem01_w1;
+ }
}
/* Get LMT base address and LMT ID as lcore id */
@@ -760,6 +770,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w1 = sendext01_w1;
}
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ /* Reset send mem alg to SETTSTMP from SUB*/
+ sendmem01_w0 = vbicq_u64(sendmem01_w0,
+ vdupq_n_u64(BIT_ULL(59)));
+ /* Reset send mem address to default. */
+ sendmem01_w1 =
+ vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
+ sendmem23_w0 = sendmem01_w0;
+ sendmem23_w1 = sendmem01_w1;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1371,6 +1392,44 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
}
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ /* Tx ol_flag for timestam. */
+ const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
+ PKT_TX_IEEE1588_TMST};
+ /* Set send mem alg to SUB. */
+ const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
+ /* Increment send mem address by 8. */
+ const uint64x2_t addr = {0x8, 0x8};
+
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* Check if timestamp is requested and generate inverted
+ * mask as we need not make any changes to default cmd
+ * value.
+ */
+ xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
+ ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
+
+ /* Change send mem address to an 8 byte offset when
+ * TSTMP is disabled.
+ */
+ sendmem01_w1 = vaddq_u64(sendmem01_w1,
+ vandq_u64(xtmp128, addr));
+ sendmem23_w1 = vaddq_u64(sendmem23_w1,
+ vandq_u64(ytmp128, addr));
+ /* Change send mem alg to SUB when TSTMP is disabled. */
+ sendmem01_w0 = vorrq_u64(sendmem01_w0,
+ vandq_u64(xtmp128, alg));
+ sendmem23_w0 = vorrq_u64(sendmem23_w0,
+ vandq_u64(ytmp128, alg));
+
+ cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
+ cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
+ cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
+ cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
@@ -1458,19 +1517,39 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (flags & NIX_TX_NEED_EXT_HDR) {
/* Store the prepared send desc to LMT lines */
- vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
- vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
- vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
- vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
- vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
- vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
- lnum += 1;
- vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
- vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
- vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
- vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
- vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
- vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
+ lnum += 1;
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
+ } else {
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
+ lnum += 1;
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
+ }
lnum += 1;
} else {
/* Store the prepared send desc to LMT lines */
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index beb5c649bb..0b4a4c7bae 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -13,9 +13,8 @@
{ \
uint64_t cmd[sz]; \
\
- /* VLAN, TSTMP, TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
- (flags) & NIX_TX_OFFLOAD_TSO_F) \
+ /* TSO is not supported by vec */ \
+ if ((flags) & NIX_TX_OFFLOAD_TSO_F) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
(flags)); \
diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
index 4b43cdaff9..c32681ed44 100644
--- a/drivers/net/cnxk/cn9k_tx.c
+++ b/drivers/net/cnxk/cn9k_tx.c
@@ -66,9 +66,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena ||
- (dev->tx_offload_flags &
- (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
+ if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index d5715bb52d..cb574a1c1d 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -553,12 +553,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
- cmd2[NIX_DESCS_PER_LOOP];
+ cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
uint64x2_t sendext01_w0, sendext23_w0;
uint64x2_t sendext01_w1, sendext23_w1;
+ uint64x2_t sendmem01_w0, sendmem23_w0;
+ uint64x2_t sendmem01_w1, sendmem23_w1;
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn9k_eth_txq *txq = tx_queue;
@@ -597,6 +599,12 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w1 = sendext01_w1;
sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]);
sgdesc23_w0 = sgdesc01_w0;
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ sendmem01_w0 = vld1q_dup_u64(&txq->cmd[6]);
+ sendmem23_w0 = sendmem01_w0;
+ sendmem01_w1 = vld1q_dup_u64(&txq->cmd[7]);
+ sendmem23_w1 = sendmem01_w1;
+ }
} else {
sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
sgdesc23_w0 = sgdesc01_w0;
@@ -618,6 +626,17 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w1 = sendext01_w1;
}
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ /* Reset send mem alg to SETTSTMP from SUB*/
+ sendmem01_w0 = vbicq_u64(sendmem01_w0,
+ vdupq_n_u64(BIT_ULL(59)));
+ /* Reset send mem address to default. */
+ sendmem01_w1 =
+ vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
+ sendmem23_w0 = sendmem01_w0;
+ sendmem23_w1 = sendmem01_w1;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1229,6 +1248,44 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
}
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ /* Tx ol_flag for timestam. */
+ const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
+ PKT_TX_IEEE1588_TMST};
+ /* Set send mem alg to SUB. */
+ const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
+ /* Increment send mem address by 8. */
+ const uint64x2_t addr = {0x8, 0x8};
+
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* Check if timestamp is requested and generate inverted
+ * mask as we need not make any changes to default cmd
+ * value.
+ */
+ xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
+ ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
+
+ /* Change send mem address to an 8 byte offset when
+ * TSTMP is disabled.
+ */
+ sendmem01_w1 = vaddq_u64(sendmem01_w1,
+ vandq_u64(xtmp128, addr));
+ sendmem23_w1 = vaddq_u64(sendmem23_w1,
+ vandq_u64(ytmp128, addr));
+ /* Change send mem alg to SUB when TSTMP is disabled. */
+ sendmem01_w0 = vorrq_u64(sendmem01_w0,
+ vandq_u64(xtmp128, alg));
+ sendmem23_w0 = vorrq_u64(sendmem23_w0,
+ vandq_u64(ytmp128, alg));
+
+ cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
+ cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
+ cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
+ cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
@@ -1327,22 +1384,44 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
* Split and Tx twice.
*/
do {
- vst1q_u64(lmt_addr, cmd0[0]);
- vst1q_u64(lmt_addr + 2, cmd2[0]);
- vst1q_u64(lmt_addr + 4, cmd1[0]);
- vst1q_u64(lmt_addr + 6, cmd0[1]);
- vst1q_u64(lmt_addr + 8, cmd2[1]);
- vst1q_u64(lmt_addr + 10, cmd1[1]);
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd2[0]);
+ vst1q_u64(lmt_addr + 4, cmd1[0]);
+ vst1q_u64(lmt_addr + 6, cmd3[0]);
+ vst1q_u64(lmt_addr + 8, cmd0[1]);
+ vst1q_u64(lmt_addr + 10, cmd2[1]);
+ vst1q_u64(lmt_addr + 12, cmd1[1]);
+ vst1q_u64(lmt_addr + 14, cmd3[1]);
+ } else {
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd2[0]);
+ vst1q_u64(lmt_addr + 4, cmd1[0]);
+ vst1q_u64(lmt_addr + 6, cmd0[1]);
+ vst1q_u64(lmt_addr + 8, cmd2[1]);
+ vst1q_u64(lmt_addr + 10, cmd1[1]);
+ }
lmt_status = roc_lmt_submit_ldeor(io_addr);
} while (lmt_status == 0);
do {
- vst1q_u64(lmt_addr, cmd0[2]);
- vst1q_u64(lmt_addr + 2, cmd2[2]);
- vst1q_u64(lmt_addr + 4, cmd1[2]);
- vst1q_u64(lmt_addr + 6, cmd0[3]);
- vst1q_u64(lmt_addr + 8, cmd2[3]);
- vst1q_u64(lmt_addr + 10, cmd1[3]);
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ vst1q_u64(lmt_addr, cmd0[2]);
+ vst1q_u64(lmt_addr + 2, cmd2[2]);
+ vst1q_u64(lmt_addr + 4, cmd1[2]);
+ vst1q_u64(lmt_addr + 6, cmd3[2]);
+ vst1q_u64(lmt_addr + 8, cmd0[3]);
+ vst1q_u64(lmt_addr + 10, cmd2[3]);
+ vst1q_u64(lmt_addr + 12, cmd1[3]);
+ vst1q_u64(lmt_addr + 14, cmd3[3]);
+ } else {
+ vst1q_u64(lmt_addr, cmd0[2]);
+ vst1q_u64(lmt_addr + 2, cmd2[2]);
+ vst1q_u64(lmt_addr + 4, cmd1[2]);
+ vst1q_u64(lmt_addr + 6, cmd0[3]);
+ vst1q_u64(lmt_addr + 8, cmd2[3]);
+ vst1q_u64(lmt_addr + 10, cmd1[3]);
+ }
lmt_status = roc_lmt_submit_ldeor(io_addr);
} while (lmt_status == 0);
} else {
diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c
index 5842facb58..9ade66db2b 100644
--- a/drivers/net/cnxk/cn9k_tx_vec.c
+++ b/drivers/net/cnxk/cn9k_tx_vec.c
@@ -13,9 +13,8 @@
{ \
uint64_t cmd[sz]; \
\
- /* VLAN, TSTMP, TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
- (flags) & NIX_TX_OFFLOAD_TSO_F) \
+ /* TSO is not supported by vec */ \
+ if ((flags) & NIX_TX_OFFLOAD_TSO_F) \
return 0; \
return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
(flags)); \
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v3 05/13] net/cnxk: enable TSO processing in vector Tx
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
` (2 preceding siblings ...)
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 04/13] net/cnxk: enable ptp " pbhagavatula
@ 2021-06-20 20:28 ` pbhagavatula
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 06/13] net/cnxk: add multi seg Tx vector routine pbhagavatula
` (10 subsequent siblings)
14 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-20 20:28 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Enable TSO offload in vector Tx burst function.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/net/cnxk/cn10k_tx.c | 2 +-
drivers/net/cnxk/cn10k_tx.h | 97 +++++++++++++++++++++++++++++++++
drivers/net/cnxk/cn10k_tx_vec.c | 5 +-
drivers/net/cnxk/cn9k_tx.c | 2 +-
drivers/net/cnxk/cn9k_tx.h | 94 ++++++++++++++++++++++++++++++++
drivers/net/cnxk/cn9k_tx_vec.c | 5 +-
6 files changed, 199 insertions(+), 6 deletions(-)
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index c4c3e65704..d06879163f 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -67,7 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
+ if (dev->scalar_ena)
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 8af6799ff6..26797581e7 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -689,6 +689,46 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
#if defined(RTE_ARCH_ARM64)
+static __rte_always_inline void
+cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
+ union nix_send_ext_w0_u *w0, uint64_t ol_flags,
+ const uint64_t flags, const uint64_t lso_tun_fmt)
+{
+ uint16_t lso_sb;
+ uint64_t mask;
+
+ if (!(ol_flags & PKT_TX_TCP_SEG))
+ return;
+
+ mask = -(!w1->il3type);
+ lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
+
+ w0->u |= BIT(14);
+ w0->lso_sb = lso_sb;
+ w0->lso_mps = m->tso_segsz;
+ w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
+ w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
+
+ /* Handle tunnel tso */
+ if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
+ (ol_flags & PKT_TX_TUNNEL_MASK)) {
+ const uint8_t is_udp_tun =
+ (CNXK_NIX_UDP_TUN_BITMASK >>
+ ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
+ 0x1;
+ uint8_t shift = is_udp_tun ? 32 : 0;
+
+ shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
+ shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
+
+ w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
+ w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
+ /* Update format for UDP tunneled packet */
+
+ w0->lso_format = (lso_tun_fmt >> shift);
+ }
+}
+
#define NIX_DESCS_PER_LOOP 4
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
@@ -723,6 +763,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
/* Reduce the cached count */
txq->fc_cache_pkts -= pkts;
+ /* Perform header writes before barrier for TSO */
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ for (i = 0; i < pkts; i++)
+ cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
+ }
senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
senddesc23_w0 = senddesc01_w0;
@@ -781,6 +826,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendmem23_w1 = sendmem01_w1;
}
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ /* Clear the LSO enable bit. */
+ sendext01_w0 = vbicq_u64(sendext01_w0,
+ vdupq_n_u64(BIT_ULL(14)));
+ sendext23_w0 = sendext01_w0;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1430,6 +1482,51 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
}
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ const uint64_t lso_fmt = txq->lso_tun_fmt;
+ uint64_t sx_w0[NIX_DESCS_PER_LOOP];
+ uint64_t sd_w1[NIX_DESCS_PER_LOOP];
+
+ /* Extract SD W1 as we need to set L4 types. */
+ vst1q_u64(sd_w1, senddesc01_w1);
+ vst1q_u64(sd_w1 + 2, senddesc23_w1);
+
+ /* Extract SX W0 as we need to set LSO fields. */
+ vst1q_u64(sx_w0, sendext01_w0);
+ vst1q_u64(sx_w0 + 2, sendext23_w0);
+
+ /* Extract ol_flags. */
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* Prepare individual mbufs. */
+ cn10k_nix_prepare_tso(tx_pkts[0],
+ (union nix_send_hdr_w1_u *)&sd_w1[0],
+ (union nix_send_ext_w0_u *)&sx_w0[0],
+ vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
+
+ cn10k_nix_prepare_tso(tx_pkts[1],
+ (union nix_send_hdr_w1_u *)&sd_w1[1],
+ (union nix_send_ext_w0_u *)&sx_w0[1],
+ vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
+
+ cn10k_nix_prepare_tso(tx_pkts[2],
+ (union nix_send_hdr_w1_u *)&sd_w1[2],
+ (union nix_send_ext_w0_u *)&sx_w0[2],
+ vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
+
+ cn10k_nix_prepare_tso(tx_pkts[3],
+ (union nix_send_hdr_w1_u *)&sd_w1[3],
+ (union nix_send_ext_w0_u *)&sx_w0[3],
+ vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
+
+ senddesc01_w1 = vld1q_u64(sd_w1);
+ senddesc23_w1 = vld1q_u64(sd_w1 + 2);
+
+ sendext01_w0 = vld1q_u64(sx_w0);
+ sendext23_w0 = vld1q_u64(sx_w0 + 2);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index 0b4a4c7bae..34e3737501 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -13,8 +13,9 @@
{ \
uint64_t cmd[sz]; \
\
- /* TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_TSO_F) \
+ /* For TSO inner checksum is a must */ \
+ if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
+ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
(flags)); \
diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
index c32681ed44..735e21cc60 100644
--- a/drivers/net/cnxk/cn9k_tx.c
+++ b/drivers/net/cnxk/cn9k_tx.c
@@ -66,7 +66,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
+ if (dev->scalar_ena)
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index cb574a1c1d..dca732a9fa 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -545,6 +545,43 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
#if defined(RTE_ARCH_ARM64)
+static __rte_always_inline void
+cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
+ union nix_send_ext_w0_u *w0, uint64_t ol_flags,
+ uint64_t flags)
+{
+ uint16_t lso_sb;
+ uint64_t mask;
+
+ if (!(ol_flags & PKT_TX_TCP_SEG))
+ return;
+
+ mask = -(!w1->il3type);
+ lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
+
+ w0->u |= BIT(14);
+ w0->lso_sb = lso_sb;
+ w0->lso_mps = m->tso_segsz;
+ w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
+ w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
+
+ /* Handle tunnel tso */
+ if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
+ (ol_flags & PKT_TX_TUNNEL_MASK)) {
+ const uint8_t is_udp_tun =
+ (CNXK_NIX_UDP_TUN_BITMASK >>
+ ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
+ 0x1;
+
+ w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
+ w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
+ /* Update format for UDP tunneled packet */
+ w0->lso_format += is_udp_tun ? 2 : 6;
+
+ w0->lso_format += !!(ol_flags & PKT_TX_OUTER_IPV6) << 1;
+ }
+}
+
#define NIX_DESCS_PER_LOOP 4
static __rte_always_inline uint16_t
cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
@@ -580,6 +617,12 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
/* Reduce the cached count */
txq->fc_cache_pkts -= pkts;
+ /* Perform header writes before barrier for TSO */
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ for (i = 0; i < pkts; i++)
+ cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
+ }
+
/* Lets commit any changes in the packet here as no further changes
* to the packet will be done unless no fast free is enabled.
*/
@@ -637,6 +680,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendmem23_w1 = sendmem01_w1;
}
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ /* Clear the LSO enable bit. */
+ sendext01_w0 = vbicq_u64(sendext01_w0,
+ vdupq_n_u64(BIT_ULL(14)));
+ sendext23_w0 = sendext01_w0;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1286,6 +1336,50 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
}
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ uint64_t sx_w0[NIX_DESCS_PER_LOOP];
+ uint64_t sd_w1[NIX_DESCS_PER_LOOP];
+
+ /* Extract SD W1 as we need to set L4 types. */
+ vst1q_u64(sd_w1, senddesc01_w1);
+ vst1q_u64(sd_w1 + 2, senddesc23_w1);
+
+ /* Extract SX W0 as we need to set LSO fields. */
+ vst1q_u64(sx_w0, sendext01_w0);
+ vst1q_u64(sx_w0 + 2, sendext23_w0);
+
+ /* Extract ol_flags. */
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* Prepare individual mbufs. */
+ cn9k_nix_prepare_tso(tx_pkts[0],
+ (union nix_send_hdr_w1_u *)&sd_w1[0],
+ (union nix_send_ext_w0_u *)&sx_w0[0],
+ vgetq_lane_u64(xtmp128, 0), flags);
+
+ cn9k_nix_prepare_tso(tx_pkts[1],
+ (union nix_send_hdr_w1_u *)&sd_w1[1],
+ (union nix_send_ext_w0_u *)&sx_w0[1],
+ vgetq_lane_u64(xtmp128, 1), flags);
+
+ cn9k_nix_prepare_tso(tx_pkts[2],
+ (union nix_send_hdr_w1_u *)&sd_w1[2],
+ (union nix_send_ext_w0_u *)&sx_w0[2],
+ vgetq_lane_u64(ytmp128, 0), flags);
+
+ cn9k_nix_prepare_tso(tx_pkts[3],
+ (union nix_send_hdr_w1_u *)&sd_w1[3],
+ (union nix_send_ext_w0_u *)&sx_w0[3],
+ vgetq_lane_u64(ytmp128, 1), flags);
+
+ senddesc01_w1 = vld1q_u64(sd_w1);
+ senddesc23_w1 = vld1q_u64(sd_w1 + 2);
+
+ sendext01_w0 = vld1q_u64(sx_w0);
+ sendext23_w0 = vld1q_u64(sx_w0 + 2);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c
index 9ade66db2b..56a3e2514a 100644
--- a/drivers/net/cnxk/cn9k_tx_vec.c
+++ b/drivers/net/cnxk/cn9k_tx_vec.c
@@ -13,8 +13,9 @@
{ \
uint64_t cmd[sz]; \
\
- /* TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_TSO_F) \
+ /* For TSO inner checksum is a must */ \
+ if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
+ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
(flags)); \
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v3 06/13] net/cnxk: add multi seg Tx vector routine
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
` (3 preceding siblings ...)
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 05/13] net/cnxk: enable TSO " pbhagavatula
@ 2021-06-20 20:28 ` pbhagavatula
2021-06-20 20:29 ` [dpdk-dev] [PATCH v3 07/13] event/cnxk: add Rx adapter support pbhagavatula
` (9 subsequent siblings)
14 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-20 20:28 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add multi segment Tx vector routine.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/net/cnxk/cn10k_tx.c | 20 +-
drivers/net/cnxk/cn10k_tx.h | 388 +++++++++++++++++++++++++--
drivers/net/cnxk/cn10k_tx_vec_mseg.c | 24 ++
drivers/net/cnxk/cn9k_tx.c | 20 +-
drivers/net/cnxk/cn9k_tx.h | 272 ++++++++++++++++++-
drivers/net/cnxk/cn9k_tx_vec_mseg.c | 24 ++
drivers/net/cnxk/meson.build | 6 +-
7 files changed, 709 insertions(+), 45 deletions(-)
create mode 100644 drivers/net/cnxk/cn10k_tx_vec_mseg.c
create mode 100644 drivers/net/cnxk/cn9k_tx_vec_mseg.c
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index d06879163f..1f30bab59a 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -67,13 +67,23 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena)
+ const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_vec_mseg_##name,
+
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ if (dev->scalar_ena) {
pick_tx_func(eth_dev, nix_eth_tx_burst);
- else
+ if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+ pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
+ } else {
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
-
- if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
- pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
+ if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+ pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg);
+ }
rte_mb();
}
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 26797581e7..532b53b319 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -42,6 +42,13 @@
} \
} while (0)
+/* Encoded number of segments to number of dwords macro, each value of nb_segs
+ * is encoded as 4bits.
+ */
+#define NIX_SEGDW_MAGIC 0x76654432210ULL
+
+#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
+
#define LMT_OFF(lmt_addr, lmt_num, offset) \
(void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))
@@ -102,6 +109,14 @@ cn10k_nix_tx_steor_data(const uint16_t flags)
return data;
}
+static __rte_always_inline uint8_t
+cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
+{
+ return ((flags & NIX_TX_NEED_EXT_HDR) ?
+ (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
+ 4);
+}
+
static __rte_always_inline uint64_t
cn10k_nix_tx_steor_vec_data(const uint16_t flags)
{
@@ -729,7 +744,244 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
}
}
+static __rte_always_inline void
+cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
+ union nix_send_hdr_w0_u *sh,
+ union nix_send_sg_s *sg, const uint32_t flags)
+{
+ struct rte_mbuf *m_next;
+ uint64_t *slist, sg_u;
+ uint16_t nb_segs;
+ int i = 1;
+
+ sh->total = m->pkt_len;
+ /* Clear sg->u header before use */
+ sg->u &= 0xFC00000000000000;
+ sg_u = sg->u;
+ slist = &cmd[0];
+
+ sg_u = sg_u | ((uint64_t)m->data_len);
+
+ nb_segs = m->nb_segs - 1;
+ m_next = m->next;
+
+ /* Set invert df if buffer is not to be freed by H/W */
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+ sg_u |= (cnxk_nix_prefree_seg(m) << 55);
+ /* Mark mempool object as "put" since it is freed by NIX */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ if (!(sg_u & (1ULL << 55)))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+
+ m = m_next;
+ /* Fill mbuf segments */
+ do {
+ m_next = m->next;
+ sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
+ *slist = rte_mbuf_data_iova(m);
+ /* Set invert df if buffer is not to be freed by H/W */
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+ sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
+ /* Mark mempool object as "put" since it is freed by NIX
+ */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ if (!(sg_u & (1ULL << (i + 55))))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+ slist++;
+ i++;
+ nb_segs--;
+ if (i > 2 && nb_segs) {
+ i = 0;
+ /* Next SG subdesc */
+ *(uint64_t *)slist = sg_u & 0xFC00000000000000;
+ sg->u = sg_u;
+ sg->segs = 3;
+ sg = (union nix_send_sg_s *)slist;
+ sg_u = sg->u;
+ slist++;
+ }
+ m = m_next;
+ } while (nb_segs);
+
+ sg->u = sg_u;
+ sg->segs = i;
+}
+
+static __rte_always_inline void
+cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
+ uint64x2_t *cmd1, const uint8_t segdw,
+ const uint32_t flags)
+{
+ union nix_send_hdr_w0_u sh;
+ union nix_send_sg_s sg;
+
+ if (m->nb_segs == 1) {
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+ sg.u |= (cnxk_nix_prefree_seg(m) << 55);
+ cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
+ }
+
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+ if (!(sg.u & (1ULL << 55)))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+ return;
+ }
+
+ sh.u = vgetq_lane_u64(cmd0[0], 0);
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+
+ cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
+
+ sh.sizem1 = segdw - 1;
+ cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
+ cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
+}
+
#define NIX_DESCS_PER_LOOP 4
+
+static __rte_always_inline uint8_t
+cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
+ uint64x2_t *cmd1, uint64x2_t *cmd2,
+ uint64x2_t *cmd3, uint8_t *segdw,
+ uint64_t *lmt_addr, __uint128_t *data128,
+ uint8_t *shift, const uint16_t flags)
+{
+ uint8_t j, off, lmt_used;
+
+ if (!(flags & NIX_TX_NEED_EXT_HDR) &&
+ !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
+ /* No segments in 4 consecutive packets. */
+ if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
+ for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
+ cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd1[0]);
+ vst1q_u64(lmt_addr + 4, cmd0[1]);
+ vst1q_u64(lmt_addr + 6, cmd1[1]);
+ vst1q_u64(lmt_addr + 8, cmd0[2]);
+ vst1q_u64(lmt_addr + 10, cmd1[2]);
+ vst1q_u64(lmt_addr + 12, cmd0[3]);
+ vst1q_u64(lmt_addr + 14, cmd1[3]);
+
+ *data128 |= ((__uint128_t)7) << *shift;
+ shift += 3;
+
+ return 1;
+ }
+ }
+
+ lmt_used = 0;
+ for (j = 0; j < NIX_DESCS_PER_LOOP;) {
+ /* Fit consecutive packets in same LMTLINE. */
+ if ((segdw[j] + segdw[j + 1]) <= 8) {
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
+ &cmd0[j + 1],
+ &cmd1[j + 1],
+ segdw[j + 1], flags);
+ /* TSTAMP takes 4 each, no segs. */
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ vst1q_u64(lmt_addr + 6, cmd3[j]);
+
+ vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
+ vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
+ vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
+ /* EXT header take 3 each, space for 2 segs.*/
+ cn10k_nix_prepare_mseg_vec(mbufs[j],
+ lmt_addr + 6,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ off = segdw[j] - 3;
+ off <<= 1;
+ cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
+ lmt_addr + 12 + off,
+ &cmd0[j + 1],
+ &cmd1[j + 1],
+ segdw[j + 1], flags);
+ vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
+ vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
+ } else {
+ cn10k_nix_prepare_mseg_vec(mbufs[j],
+ lmt_addr + 4,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd1[j]);
+ off = segdw[j] - 2;
+ off <<= 1;
+ cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
+ lmt_addr + 8 + off,
+ &cmd0[j + 1],
+ &cmd1[j + 1],
+ segdw[j + 1], flags);
+ vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
+ }
+ *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
+ << *shift;
+ *shift += 3;
+ j += 2;
+ } else {
+ if ((flags & NIX_TX_NEED_EXT_HDR) &&
+ (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
+ cn10k_nix_prepare_mseg_vec(mbufs[j],
+ lmt_addr + 6,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ off = segdw[j] - 4;
+ off <<= 1;
+ vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
+ cn10k_nix_prepare_mseg_vec(mbufs[j],
+ lmt_addr + 6,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ } else {
+ cn10k_nix_prepare_mseg_vec(mbufs[j],
+ lmt_addr + 4,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd1[j]);
+ }
+ *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
+ *shift += 3;
+ j++;
+ }
+ lmt_used++;
+ lmt_addr += 16;
+ }
+
+ return lmt_used;
+}
+
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t pkts, uint64_t *cmd, const uint16_t flags)
@@ -738,7 +990,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
- uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
+ uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
uint16_t left, scalar, burst, i, lmt_id;
@@ -746,6 +998,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64x2_t sendext01_w1, sendext23_w1;
uint64x2_t sendmem01_w0, sendmem23_w0;
uint64x2_t sendmem01_w1, sendmem23_w1;
+ uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn10k_eth_txq *txq = tx_queue;
@@ -754,7 +1007,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64x2_t ltypes01, ltypes23;
uint64x2_t xtmp128, ytmp128;
uint64x2_t xmask01, xmask23;
- uint8_t lnum;
+ uint8_t lnum, shift;
+ union wdata {
+ __uint128_t data128;
+ uint64_t data[2];
+ } wd;
NIX_XMIT_FC_OR_RETURN(txq, pkts);
@@ -798,8 +1055,43 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
cn10k_nix_pkts_per_vec_brst(flags) :
left;
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ wd.data128 = 0;
+ shift = 16;
+ }
lnum = 0;
+
for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ struct rte_mbuf *m = tx_pkts[j];
+ uint8_t j;
+
+ for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
+ /* Get dwords based on nb_segs. */
+ segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
+ /* Add dwords based on offloads. */
+ segdw[j] += 1 + /* SEND HDR */
+ !!(flags & NIX_TX_NEED_EXT_HDR) +
+ !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
+ }
+
+ /* Check if there are enough LMTLINES for this loop */
+ if (lnum + 4 > 32) {
+ uint8_t ldwords_con = 0, lneeded = 0;
+ for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
+ ldwords_con += segdw[j];
+ if (ldwords_con > 8) {
+ lneeded += 1;
+ ldwords_con = segdw[j];
+ }
+ }
+ lneeded += 1;
+ if (lnum + lneeded > 32) {
+ burst = i;
+ break;
+ }
+ }
+ }
/* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
senddesc01_w0 =
vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
@@ -1527,7 +1819,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w0 = vld1q_u64(sx_w0 + 2);
}
- if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
+ !(flags & NIX_TX_MULTI_SEG_F)) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
xmask23 = xmask01;
@@ -1567,7 +1860,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
(void **)&mbuf3, 1, 0);
senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
- } else {
+ } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1612,7 +1905,19 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
}
- if (flags & NIX_TX_NEED_EXT_HDR) {
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ uint8_t j;
+
+ segdw[4] = 8;
+ j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
+ cmd2, cmd3, segdw,
+ (uint64_t *)
+ LMT_OFF(laddr, lnum,
+ 0),
+ &wd.data128, &shift,
+ flags);
+ lnum += j;
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
/* Store the prepared send desc to LMT lines */
if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
@@ -1664,34 +1969,55 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
}
+ if (flags & NIX_TX_MULTI_SEG_F)
+ wd.data[0] >>= 16;
+
/* Trigger LMTST */
if (lnum > 16) {
- data = cn10k_nix_tx_steor_vec_data(flags);
- pa = io_addr | (data & 0x7) << 4;
- data &= ~0x7ULL;
- data |= (15ULL << 12);
- data |= (uint64_t)lmt_id;
+ if (!(flags & NIX_TX_MULTI_SEG_F))
+ wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
+
+ pa = io_addr | (wd.data[0] & 0x7) << 4;
+ wd.data[0] &= ~0x7ULL;
+
+ if (flags & NIX_TX_MULTI_SEG_F)
+ wd.data[0] <<= 16;
+
+ wd.data[0] |= (15ULL << 12);
+ wd.data[0] |= (uint64_t)lmt_id;
/* STEOR0 */
- roc_lmt_submit_steorl(data, pa);
+ roc_lmt_submit_steorl(wd.data[0], pa);
- data = cn10k_nix_tx_steor_vec_data(flags);
- pa = io_addr | (data & 0x7) << 4;
- data &= ~0x7ULL;
- data |= ((uint64_t)(lnum - 17)) << 12;
- data |= (uint64_t)(lmt_id + 16);
+ if (!(flags & NIX_TX_MULTI_SEG_F))
+ wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
+
+ pa = io_addr | (wd.data[1] & 0x7) << 4;
+ wd.data[1] &= ~0x7ULL;
+
+ if (flags & NIX_TX_MULTI_SEG_F)
+ wd.data[1] <<= 16;
+
+ wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
+ wd.data[1] |= (uint64_t)(lmt_id + 16);
/* STEOR1 */
- roc_lmt_submit_steorl(data, pa);
+ roc_lmt_submit_steorl(wd.data[1], pa);
} else if (lnum) {
- data = cn10k_nix_tx_steor_vec_data(flags);
- pa = io_addr | (data & 0x7) << 4;
- data &= ~0x7ULL;
- data |= ((uint64_t)(lnum - 1)) << 12;
- data |= lmt_id;
+ if (!(flags & NIX_TX_MULTI_SEG_F))
+ wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
+
+ pa = io_addr | (wd.data[0] & 0x7) << 4;
+ wd.data[0] &= ~0x7ULL;
+
+ if (flags & NIX_TX_MULTI_SEG_F)
+ wd.data[0] <<= 16;
+
+ wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
+ wd.data[0] |= lmt_id;
/* STEOR0 */
- roc_lmt_submit_steorl(data, pa);
+ roc_lmt_submit_steorl(wd.data[0], pa);
}
left -= burst;
@@ -1699,9 +2025,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (left)
goto again;
- if (unlikely(scalar))
- pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd,
- flags);
+ if (unlikely(scalar)) {
+ if (flags & NIX_TX_MULTI_SEG_F)
+ pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
+ scalar, cmd, flags);
+ else
+ pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
+ cmd, flags);
+ }
return pkts;
}
@@ -1866,7 +2197,10 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
\
uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
- void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
+ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
+ \
+ uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
+ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
NIX_TX_FASTPATH_MODES
#undef T
diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
new file mode 100644
index 0000000000..1fad81dbad
--- /dev/null
+++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_ethdev.h"
+#include "cn10k_tx.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
+ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
+ { \
+ uint64_t cmd[sz]; \
+ \
+ /* For TSO inner checksum is a must */ \
+ if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
+ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
+ return 0; \
+ return cn10k_nix_xmit_pkts_vector( \
+ tx_queue, tx_pkts, pkts, cmd, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
index 735e21cc60..763f9a14fd 100644
--- a/drivers/net/cnxk/cn9k_tx.c
+++ b/drivers/net/cnxk/cn9k_tx.c
@@ -66,13 +66,23 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena)
+ const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_nix_xmit_pkts_vec_mseg_##name,
+
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ if (dev->scalar_ena) {
pick_tx_func(eth_dev, nix_eth_tx_burst);
- else
+ if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+ pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
+ } else {
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
-
- if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
- pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
+ if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+ pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg);
+ }
rte_mb();
}
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index dca732a9fa..ed65cd351f 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -582,7 +582,238 @@ cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
}
}
+static __rte_always_inline uint8_t
+cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
+ union nix_send_hdr_w0_u *sh,
+ union nix_send_sg_s *sg, const uint32_t flags)
+{
+ struct rte_mbuf *m_next;
+ uint64_t *slist, sg_u;
+ uint16_t nb_segs;
+ uint64_t segdw;
+ int i = 1;
+
+ sh->total = m->pkt_len;
+ /* Clear sg->u header before use */
+ sg->u &= 0xFC00000000000000;
+ sg_u = sg->u;
+ slist = &cmd[0];
+
+ sg_u = sg_u | ((uint64_t)m->data_len);
+
+ nb_segs = m->nb_segs - 1;
+ m_next = m->next;
+
+ /* Set invert df if buffer is not to be freed by H/W */
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+ sg_u |= (cnxk_nix_prefree_seg(m) << 55);
+ /* Mark mempool object as "put" since it is freed by NIX */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ if (!(sg_u & (1ULL << 55)))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+
+ m = m_next;
+ /* Fill mbuf segments */
+ do {
+ m_next = m->next;
+ sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
+ *slist = rte_mbuf_data_iova(m);
+ /* Set invert df if buffer is not to be freed by H/W */
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+ sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
+ /* Mark mempool object as "put" since it is freed by NIX
+ */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ if (!(sg_u & (1ULL << (i + 55))))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+ slist++;
+ i++;
+ nb_segs--;
+ if (i > 2 && nb_segs) {
+ i = 0;
+ /* Next SG subdesc */
+ *(uint64_t *)slist = sg_u & 0xFC00000000000000;
+ sg->u = sg_u;
+ sg->segs = 3;
+ sg = (union nix_send_sg_s *)slist;
+ sg_u = sg->u;
+ slist++;
+ }
+ m = m_next;
+ } while (nb_segs);
+
+ sg->u = sg_u;
+ sg->segs = i;
+ segdw = (uint64_t *)slist - (uint64_t *)&cmd[0];
+
+ segdw += 2;
+ /* Roundup extra dwords to multiple of 2 */
+ segdw = (segdw >> 1) + (segdw & 0x1);
+ /* Default dwords */
+ segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +
+ !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
+ sh->sizem1 = segdw - 1;
+
+ return segdw;
+}
+
+static __rte_always_inline uint8_t
+cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
+ uint64x2_t *cmd1, const uint32_t flags)
+{
+ union nix_send_hdr_w0_u sh;
+ union nix_send_sg_s sg;
+ uint8_t ret;
+
+ if (m->nb_segs == 1) {
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+ sg.u |= (cnxk_nix_prefree_seg(m) << 55);
+ cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
+ }
+
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+ if (!(sg.u & (1ULL << 55)))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+ return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) +
+ !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
+ }
+
+ sh.u = vgetq_lane_u64(cmd0[0], 0);
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+
+ ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
+
+ cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
+ cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
+ return ret;
+}
+
#define NIX_DESCS_PER_LOOP 4
+
+static __rte_always_inline void
+cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1,
+ uint64x2_t *cmd2, uint64x2_t *cmd3,
+ uint8_t *segdw,
+ uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2],
+ uint64_t *lmt_addr, rte_iova_t io_addr,
+ const uint32_t flags)
+{
+ uint64_t lmt_status;
+ uint8_t j, off;
+
+ if (!(flags & NIX_TX_NEED_EXT_HDR) &&
+ !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
+ /* No segments in 4 consecutive packets. */
+ if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
+ do {
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd1[0]);
+ vst1q_u64(lmt_addr + 4, cmd0[1]);
+ vst1q_u64(lmt_addr + 6, cmd1[1]);
+ vst1q_u64(lmt_addr + 8, cmd0[2]);
+ vst1q_u64(lmt_addr + 10, cmd1[2]);
+ vst1q_u64(lmt_addr + 12, cmd0[3]);
+ vst1q_u64(lmt_addr + 14, cmd1[3]);
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ } while (lmt_status == 0);
+
+ return;
+ }
+ }
+
+ for (j = 0; j < NIX_DESCS_PER_LOOP;) {
+ /* Fit consecutive packets in same LMTLINE. */
+ if ((segdw[j] + segdw[j + 1]) <= 8) {
+again0:
+ if ((flags & NIX_TX_NEED_EXT_HDR) &&
+ (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 4;
+ roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
+ off <<= 1;
+ vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
+
+ vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]);
+ vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]);
+ roc_lmt_mov_seg(lmt_addr + 14 + off,
+ slist[j + 1], segdw[j + 1] - 4);
+ off += ((segdw[j + 1] - 4) << 1);
+ vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]);
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 3;
+ roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
+ off <<= 1;
+ vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
+ vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
+ roc_lmt_mov_seg(lmt_addr + 12 + off,
+ slist[j + 1], segdw[j + 1] - 3);
+ } else {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 2;
+ roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
+ off <<= 1;
+ vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
+ roc_lmt_mov_seg(lmt_addr + 8 + off,
+ slist[j + 1], segdw[j + 1] - 2);
+ }
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ if (lmt_status == 0)
+ goto again0;
+ j += 2;
+ } else {
+again1:
+ if ((flags & NIX_TX_NEED_EXT_HDR) &&
+ (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 4;
+ roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
+ off <<= 1;
+ vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 3;
+ roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
+ } else {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 2;
+ roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
+ }
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ if (lmt_status == 0)
+ goto again1;
+ j += 1;
+ }
+ }
+}
+
static __rte_always_inline uint16_t
cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t pkts, uint64_t *cmd, const uint16_t flags)
@@ -1380,7 +1611,8 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w0 = vld1q_u64(sx_w0 + 2);
}
- if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
+ !(flags & NIX_TX_MULTI_SEG_F)) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
xmask23 = xmask01;
@@ -1424,7 +1656,7 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
* cnxk_nix_prefree_seg are written before LMTST.
*/
rte_io_wmb();
- } else {
+ } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1472,7 +1704,27 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
}
- if (flags & NIX_TX_NEED_EXT_HDR) {
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ uint64_t seg_list[NIX_DESCS_PER_LOOP]
+ [CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+ uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1];
+
+ /* Build mseg list for each packet individually. */
+ for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
+ segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j],
+ seg_list[j], &cmd0[j],
+ &cmd1[j], flags);
+ segdw[4] = 8;
+
+ /* Commit all changes to mbuf before LMTST. */
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+ rte_io_wmb();
+
+ cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3,
+ segdw, seg_list,
+ lmt_addr, io_addr,
+ flags);
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
/* With ext header in the command we can no longer send
* all 4 packets together since LMTLINE is 128bytes.
* Split and Tx twice.
@@ -1534,9 +1786,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
}
- if (unlikely(pkts_left))
- pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd,
- flags);
+ if (unlikely(pkts_left)) {
+ if (flags & NIX_TX_MULTI_SEG_F)
+ pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
+ pkts_left, cmd, flags);
+ else
+ pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left,
+ cmd, flags);
+ }
return pkts;
}
@@ -1701,6 +1958,9 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
\
uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \
+ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
+ \
+ uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \
void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn9k_tx_vec_mseg.c b/drivers/net/cnxk/cn9k_tx_vec_mseg.c
new file mode 100644
index 0000000000..0256efd45a
--- /dev/null
+++ b/drivers/net/cnxk/cn9k_tx_vec_mseg.c
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_ethdev.h"
+#include "cn9k_tx.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \
+ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
+ { \
+ uint64_t cmd[sz]; \
+ \
+ /* For TSO inner checksum is a must */ \
+ if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
+ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
+ return 0; \
+ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
+ (flags) | \
+ NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
index aa8c7253fb..361f7ce849 100644
--- a/drivers/net/cnxk/meson.build
+++ b/drivers/net/cnxk/meson.build
@@ -26,7 +26,8 @@ sources += files('cn9k_ethdev.c',
'cn9k_rx_vec_mseg.c',
'cn9k_tx.c',
'cn9k_tx_mseg.c',
- 'cn9k_tx_vec.c')
+ 'cn9k_tx_vec.c',
+ 'cn9k_tx_vec_mseg.c')
# CN10K
sources += files('cn10k_ethdev.c',
'cn10k_rte_flow.c',
@@ -36,7 +37,8 @@ sources += files('cn10k_ethdev.c',
'cn10k_rx_vec_mseg.c',
'cn10k_tx.c',
'cn10k_tx_mseg.c',
- 'cn10k_tx_vec.c')
+ 'cn10k_tx_vec.c',
+ 'cn10k_tx_vec_mseg.c')
deps += ['bus_pci', 'cryptodev', 'eventdev', 'security']
deps += ['common_cnxk', 'mempool_cnxk']
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v3 07/13] event/cnxk: add Rx adapter support
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
` (4 preceding siblings ...)
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 06/13] net/cnxk: add multi seg Tx vector routine pbhagavatula
@ 2021-06-20 20:29 ` pbhagavatula
2021-06-20 20:29 ` [dpdk-dev] [PATCH v3 08/13] event/cnxk: add Rx adapter fastpath ops pbhagavatula
` (8 subsequent siblings)
14 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-20 20:29 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Ray Kinsella,
Neil Horman
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Rx adapter.
Resize cn10k workslot fastpath structure to fit in 64B cacheline size.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 28 ++++
doc/guides/rel_notes/release_21_08.rst | 5 +
drivers/common/cnxk/roc_nix.h | 3 +
drivers/common/cnxk/roc_nix_fc.c | 78 ++++++++++
drivers/common/cnxk/roc_nix_priv.h | 3 +-
drivers/common/cnxk/version.map | 1 +
drivers/event/cnxk/cn10k_eventdev.c | 107 +++++++++++---
drivers/event/cnxk/cn10k_worker.c | 7 +-
drivers/event/cnxk/cn10k_worker.h | 32 +++--
drivers/event/cnxk/cn9k_eventdev.c | 89 ++++++++++++
drivers/event/cnxk/cn9k_worker.h | 4 +
drivers/event/cnxk/cnxk_eventdev.c | 2 +
drivers/event/cnxk/cnxk_eventdev.h | 43 ++++--
drivers/event/cnxk/cnxk_eventdev_adptr.c | 176 +++++++++++++++++++++++
drivers/event/cnxk/meson.build | 9 +-
15 files changed, 540 insertions(+), 47 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 36da3800cc..b7e82c1273 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -39,6 +39,10 @@ Features of the OCTEON cnxk SSO PMD are:
time granularity of 2.5us on CN9K and 1us on CN10K.
- Up to 256 TIM rings a.k.a event timer adapters.
- Up to 8 rings traversed in parallel.
+- HW managed packets enqueued from ethdev to eventdev exposed through event eth
+ RX adapter.
+- N:1 ethernet device Rx queue to Event queue mapping.
+- Full Rx offload support defined through ethdev queue configuration.
Prerequisites and Compilation procedure
---------------------------------------
@@ -93,6 +97,15 @@ Runtime Config Options
-a 0002:0e:00.0,qos=[1-50-50-50]
+- ``Force Rx Back pressure``
+
+ Force Rx back pressure when same mempool is used across ethernet device
+ connected to event device.
+
+ For example::
+
+ -a 0002:0e:00.0,force_rx_bp=1
+
- ``TIM disable NPA``
By default chunks are allocated from NPA then TIM can automatically free
@@ -160,3 +173,18 @@ Debugging Options
+---+------------+-------------------------------------------------------+
| 2 | TIM | --log-level='pmd\.event\.cnxk\.timer,8' |
+---+------------+-------------------------------------------------------+
+
+Limitations
+-----------
+
+Rx adapter support
+~~~~~~~~~~~~~~~~~~
+
+Using the same mempool for all the ethernet device ports connected to
+event device would cause back pressure to be asserted only on the first
+ethernet device.
+Back pressure is automatically disabled when using same mempool for all the
+ethernet devices connected to event device to override this applications can
+use `force_rx_bp=1` device arguments.
+Using unique mempool per each ethernet device is recommended when they are
+connected to event device.
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 31e49e1a56..3892c8017a 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -60,6 +60,11 @@ New Features
* Added net/cnxk driver which provides the support for the integrated ethernet
device.
+* **Added support for Marvell CN10K, CN9K, event Rx adapter.**
+
+ * Added Rx adapter support for event/cnxk when the ethernet device requested is
+ net/cnxk.
+
Removed Items
-------------
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index bb69027956..76613fe84e 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix,
enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix);
+void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id,
+ uint8_t ena, uint8_t force);
+
/* NPC */
int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable);
diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c
index 47be8aa3f8..f17eba4169 100644
--- a/drivers/common/cnxk/roc_nix_fc.c
+++ b/drivers/common/cnxk/roc_nix_fc.c
@@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode mode)
exit:
return rc;
}
+
+void
+rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena,
+ uint8_t force)
+{
+ struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+ struct npa_lf *lf = idev_npa_obj_get();
+ struct npa_aq_enq_req *req;
+ struct npa_aq_enq_rsp *rsp;
+ struct mbox *mbox;
+ uint32_t limit;
+ int rc;
+
+ if (roc_nix_is_sdp(roc_nix))
+ return;
+
+ if (!lf)
+ return;
+ mbox = lf->mbox;
+
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_READ;
+
+ rc = mbox_process_msg(mbox, (void *)&rsp);
+ if (rc)
+ return;
+
+ limit = rsp->aura.limit;
+ /* BP is already enabled. */
+ if (rsp->aura.bp_ena) {
+ /* If BP ids don't match disable BP. */
+ if ((rsp->aura.nix0_bpid != nix->bpid[0]) && !force) {
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_WRITE;
+
+ req->aura.bp_ena = 0;
+ req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena);
+
+ mbox_process(mbox);
+ }
+ return;
+ }
+
+ /* BP was previously enabled but now disabled skip. */
+ if (rsp->aura.bp)
+ return;
+
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_WRITE;
+
+ if (ena) {
+ req->aura.nix0_bpid = nix->bpid[0];
+ req->aura_mask.nix0_bpid = ~(req->aura_mask.nix0_bpid);
+ req->aura.bp = NIX_RQ_AURA_THRESH(
+ limit > 128 ? 256 : limit); /* 95% of size*/
+ req->aura_mask.bp = ~(req->aura_mask.bp);
+ }
+
+ req->aura.bp_ena = !!ena;
+ req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena);
+
+ mbox_process(mbox);
+}
diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h
index d9c32df442..9dc0c88a6f 100644
--- a/drivers/common/cnxk/roc_nix_priv.h
+++ b/drivers/common/cnxk/roc_nix_priv.h
@@ -16,7 +16,8 @@
#define NIX_SQB_LOWER_THRESH ((uint16_t)70)
/* Apply BP/DROP when CQ is 95% full */
-#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100)
+#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100)
+#define NIX_RQ_AURA_THRESH(x) (((x) * 95) / 100)
/* IRQ triggered when NIX_LF_CINTX_CNT[QCOUNT] crosses this value */
#define CQ_CQE_THRESH_DEFAULT 0x1ULL
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 8a5c839e57..cb1ce4b6fc 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -29,6 +29,7 @@ INTERNAL {
roc_nix_fc_config_set;
roc_nix_fc_mode_set;
roc_nix_fc_mode_get;
+ rox_nix_fc_npa_bp_cfg;
roc_nix_get_base_chan;
roc_nix_get_pf;
roc_nix_get_pf_func;
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index bf4052c76c..2060c8fe84 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -6,18 +6,6 @@
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
-static void
-cn10k_init_hws_ops(struct cn10k_sso_hws *ws, uintptr_t base)
-{
- ws->tag_wqe_op = base + SSOW_LF_GWS_WQE0;
- ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0;
- ws->updt_wqe_op = base + SSOW_LF_GWS_OP_UPD_WQP_GRP1;
- ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM;
- ws->swtag_untag_op = base + SSOW_LF_GWS_OP_SWTAG_UNTAG;
- ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH;
- ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED;
-}
-
static uint32_t
cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev)
{
@@ -56,7 +44,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
- cn10k_init_hws_ops(ws, ws->base);
ws->hws_id = port_id;
ws->swtag_req = 0;
ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -135,13 +122,14 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
cq_ds_cnt &= 0x3FFF3FFF0000;
while (aq_cnt || cq_ds_cnt || ds_cnt) {
- plt_write64(req, ws->getwrk_op);
+ plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
cn10k_sso_hws_get_work_empty(ws, &ev);
if (fn != NULL && ev.u64 != 0)
fn(arg, ev);
if (ev.sched_type != SSO_TT_EMPTY)
- cnxk_sso_hws_swtag_flush(ws->tag_wqe_op,
- ws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(
+ ws->base + SSOW_LF_GWS_WQE0,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
do {
val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
} while (val & BIT_ULL(56));
@@ -205,9 +193,11 @@ cn10k_sso_hws_reset(void *arg, void *hws)
if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) !=
SSO_TT_EMPTY) {
- plt_write64(BIT_ULL(16) | 1, ws->getwrk_op);
+ plt_write64(BIT_ULL(16) | 1,
+ ws->base + SSOW_LF_GWS_OP_GET_WORK0);
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
@@ -407,6 +397,80 @@ cn10k_sso_selftest(void)
return cnxk_sso_selftest(RTE_STR(event_cn10k));
}
+static int
+cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int rc;
+
+ RTE_SET_USED(event_dev);
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9);
+ if (rc)
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+ else
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+ return 0;
+}
+
+static void
+cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem,
+ void *tstmp_info)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ struct cn10k_sso_hws *ws = event_dev->data->ports[i];
+ ws->lookup_mem = lookup_mem;
+ ws->tstamp = tstmp_info;
+ }
+}
+
+static int
+cn10k_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cn10k_eth_rxq *rxq;
+ void *lookup_mem;
+ void *tstmp_info;
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id,
+ queue_conf);
+ if (rc)
+ return -EINVAL;
+ rxq = eth_dev->data->rx_queues[0];
+ lookup_mem = rxq->lookup_mem;
+ tstmp_info = rxq->tstamp;
+ cn10k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info);
+ cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.dev_infos_get = cn10k_sso_info_get,
.dev_configure = cn10k_sso_dev_configure,
@@ -420,6 +484,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.port_unlink = cn10k_sso_port_unlink,
.timeout_ticks = cnxk_sso_timeout_ticks,
+ .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get,
+ .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add,
+ .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del,
+ .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+ .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
@@ -502,6 +572,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn10k, cn10k_pci_sso_map);
RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
CNXK_SSO_GGRP_QOS "=<string>"
+ CNXK_SSO_FORCE_BP "=1"
CN10K_SSO_GW_MODE "=<int>"
CNXK_TIM_DISABLE_NPA "=1"
CNXK_TIM_CHNK_SLOTS "=<int>"
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index e2aa534c64..5dbae275ba 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -18,7 +18,8 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev)
cn10k_sso_hws_forward_event(ws, ev);
break;
case RTE_EVENT_OP_RELEASE:
- cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, ws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
break;
default:
return 0;
@@ -69,7 +70,7 @@ cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
if (ws->swtag_req) {
ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
return 1;
}
@@ -94,7 +95,7 @@ cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
if (ws->swtag_req) {
ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
return ret;
}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 2f093a8dd5..c7250bf9e7 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,9 +5,13 @@
#ifndef __CN10K_WORKER_H__
#define __CN10K_WORKER_H__
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
+#include "cn10k_ethdev.h"
+#include "cn10k_rx.h"
+
/* SSO Operations */
static __rte_always_inline uint8_t
@@ -31,7 +35,8 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
{
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(ws->tag_wqe_op));
+ const uint8_t cur_tt =
+ CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0));
/* CNXK model
* cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
@@ -43,9 +48,11 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
if (new_tt == SSO_TT_UNTAGGED) {
if (cur_tt != SSO_TT_UNTAGGED)
- cnxk_sso_hws_swtag_untag(ws->swtag_untag_op);
+ cnxk_sso_hws_swtag_untag(ws->base +
+ SSOW_LF_GWS_OP_SWTAG_UNTAG);
} else {
- cnxk_sso_hws_swtag_norm(tag, new_tt, ws->swtag_norm_op);
+ cnxk_sso_hws_swtag_norm(tag, new_tt,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_NORM);
}
ws->swtag_req = 1;
}
@@ -57,8 +64,9 @@ cn10k_sso_hws_fwd_group(struct cn10k_sso_hws *ws, const struct rte_event *ev,
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- plt_write64(ev->u64, ws->updt_wqe_op);
- cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op);
+ plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+ cnxk_sso_hws_swtag_desched(tag, new_tt, grp,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
}
static __rte_always_inline void
@@ -68,7 +76,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
const uint8_t grp = ev->queue_id;
/* Group hasn't changed, Use SWTAG to forward the event */
- if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_wqe_op)) == grp)
+ if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp)
cn10k_sso_hws_fwd_swtag(ws, ev);
else
/*
@@ -93,12 +101,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
PLT_CPU_FEATURE_PREAMBLE
"caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
: [wdata] "+r"(gw.get_work)
- : [gw_loc] "r"(ws->getwrk_op)
+ : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
: "memory");
#else
- plt_write64(gw.u64[0], ws->getwrk_op);
+ plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
@@ -130,11 +139,12 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
: [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
- : [tag_loc] "r"(ws->tag_wqe_op)
+ : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
: "memory");
#else
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
#endif
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 0684417eab..072800c243 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -481,6 +481,88 @@ cn9k_sso_selftest(void)
return cnxk_sso_selftest(RTE_STR(event_cn9k));
}
+static int
+cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int rc;
+
+ RTE_SET_USED(event_dev);
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9);
+ if (rc)
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+ else
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+ return 0;
+}
+
+static void
+cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem,
+ void *tstmp_info)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ if (dev->dual_ws) {
+ struct cn9k_sso_hws_dual *dws =
+ event_dev->data->ports[i];
+ dws->lookup_mem = lookup_mem;
+ dws->tstamp = tstmp_info;
+ } else {
+ struct cn9k_sso_hws *ws = event_dev->data->ports[i];
+ ws->lookup_mem = lookup_mem;
+ ws->tstamp = tstmp_info;
+ }
+ }
+}
+
+static int
+cn9k_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cn9k_eth_rxq *rxq;
+ void *lookup_mem;
+ void *tstmp_info;
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (rc)
+ return -EINVAL;
+
+ rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id,
+ queue_conf);
+ if (rc)
+ return -EINVAL;
+
+ rxq = eth_dev->data->rx_queues[0];
+ lookup_mem = rxq->lookup_mem;
+ tstmp_info = rxq->tstamp;
+ cn9k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info);
+ cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (rc)
+ return -EINVAL;
+
+ return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.dev_infos_get = cn9k_sso_info_get,
.dev_configure = cn9k_sso_dev_configure,
@@ -494,6 +576,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.port_unlink = cn9k_sso_port_unlink,
.timeout_ticks = cnxk_sso_timeout_ticks,
+ .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get,
+ .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add,
+ .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del,
+ .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+ .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
@@ -571,6 +659,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn9k, cn9k_pci_sso_map);
RTE_PMD_REGISTER_KMOD_DEP(event_cn9k, "vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(event_cn9k, CNXK_SSO_XAE_CNT "=<int>"
CNXK_SSO_GGRP_QOS "=<string>"
+ CNXK_SSO_FORCE_BP "=1"
CN9K_SSO_SINGLE_WS "=1"
CNXK_TIM_DISABLE_NPA "=1"
CNXK_TIM_CHNK_SLOTS "=<int>"
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 38fca08fb6..f5a4401465 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -5,9 +5,13 @@
#ifndef __CN9K_WORKER_H__
#define __CN9K_WORKER_H__
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
+#include "cn9k_ethdev.h"
+#include "cn9k_rx.h"
+
/* SSO Operations */
static __rte_always_inline uint8_t
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 7189ee3a79..cfd7fb971c 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -571,6 +571,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
&dev->xae_cnt);
rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict,
dev);
+ rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_value,
+ &dev->force_ena_bp);
rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_value,
&single_ws);
rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 668e51d62a..b65d725f55 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -6,6 +6,8 @@
#define __CNXK_EVENTDEV_H__
#include <rte_devargs.h>
+#include <rte_ethdev.h>
+#include <rte_event_eth_rx_adapter.h>
#include <rte_kvargs.h>
#include <rte_mbuf_pool_ops.h>
#include <rte_pci.h>
@@ -18,6 +20,7 @@
#define CNXK_SSO_XAE_CNT "xae_cnt"
#define CNXK_SSO_GGRP_QOS "qos"
+#define CNXK_SSO_FORCE_BP "force_rx_bp"
#define CN9K_SSO_SINGLE_WS "single_ws"
#define CN10K_SSO_GW_MODE "gw_mode"
@@ -81,7 +84,10 @@ struct cnxk_sso_evdev {
uint64_t nb_xaq_cfg;
rte_iova_t fc_iova;
struct rte_mempool *xaq_pool;
+ uint64_t rx_offloads;
uint64_t adptr_xae_cnt;
+ uint16_t rx_adptr_pool_cnt;
+ uint64_t *rx_adptr_pools;
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
@@ -89,25 +95,18 @@ struct cnxk_sso_evdev {
uint32_t xae_cnt;
uint8_t qos_queue_cnt;
struct cnxk_sso_qos *qos_parse_data;
+ uint8_t force_ena_bp;
/* CN9K */
uint8_t dual_ws;
/* CN10K */
uint8_t gw_mode;
} __rte_cache_aligned;
-/* CN10K HWS ops */
-#define CN10K_SSO_HWS_OPS \
- uintptr_t swtag_desched_op; \
- uintptr_t swtag_flush_op; \
- uintptr_t swtag_untag_op; \
- uintptr_t swtag_norm_op; \
- uintptr_t updt_wqe_op; \
- uintptr_t tag_wqe_op; \
- uintptr_t getwrk_op
-
struct cn10k_sso_hws {
- /* Get Work Fastpath data */
- CN10K_SSO_HWS_OPS;
+ uint64_t base;
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint32_t gw_wdata;
uint8_t swtag_req;
uint8_t hws_id;
@@ -115,7 +114,6 @@ struct cn10k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base;
uintptr_t lmt_base;
} __rte_cache_aligned;
@@ -132,6 +130,9 @@ struct cn10k_sso_hws {
struct cn9k_sso_hws {
/* Get Work Fastpath data */
CN9K_SSO_HWS_OPS;
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint8_t swtag_req;
uint8_t hws_id;
/* Add Work Fastpath data */
@@ -148,6 +149,9 @@ struct cn9k_sso_hws_state {
struct cn9k_sso_hws_dual {
/* Get Work Fastpath data */
struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint8_t swtag_req;
uint8_t vws; /* Ping pong bit */
uint8_t hws_id;
@@ -250,4 +254,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev,
/* CN9K */
void cn9k_sso_set_rsrc(void *arg);
+/* Common adapter ops */
+int cnxk_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf);
+int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id);
+int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev);
+int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev);
+
#endif /* __CNXK_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 89a1d82c14..24bfd985e7 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -2,6 +2,7 @@
* Copyright(C) 2021 Marvell.
*/
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
void
@@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
int i;
switch (event_type) {
+ case RTE_EVENT_TYPE_ETHDEV: {
+ struct cnxk_eth_rxq_sp *rxq = data;
+ uint64_t *old_ptr;
+
+ for (i = 0; i < dev->rx_adptr_pool_cnt; i++) {
+ if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i])
+ return;
+ }
+
+ dev->rx_adptr_pool_cnt++;
+ old_ptr = dev->rx_adptr_pools;
+ dev->rx_adptr_pools = rte_realloc(
+ dev->rx_adptr_pools,
+ sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0);
+ if (dev->rx_adptr_pools == NULL) {
+ dev->adptr_xae_cnt += rxq->qconf.mp->size;
+ dev->rx_adptr_pools = old_ptr;
+ dev->rx_adptr_pool_cnt--;
+ return;
+ }
+ dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] =
+ (uint64_t)rxq->qconf.mp;
+
+ dev->adptr_xae_cnt += rxq->qconf.mp->size;
+ break;
+ }
case RTE_EVENT_TYPE_TIMER: {
struct cnxk_tim_ring *timr = data;
uint16_t *old_ring_ptr;
@@ -65,3 +92,152 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
break;
}
}
+
+static int
+cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id,
+ uint16_t port_id, const struct rte_event *ev,
+ uint8_t custom_flowid)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+ rq->sso_ena = 1;
+ rq->tt = ev->sched_type;
+ rq->hwgrp = ev->queue_id;
+ rq->flow_tag_width = 20;
+ rq->wqe_skip = 1;
+ rq->tag_mask = (port_id & 0xF) << 20;
+ rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4))
+ << 24;
+
+ if (custom_flowid) {
+ rq->flow_tag_width = 0;
+ rq->tag_mask |= ev->flow_id;
+ }
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+static int
+cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+ rq->sso_ena = 0;
+ rq->flow_tag_width = 32;
+ rq->tag_mask = 0;
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+int
+cnxk_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ uint16_t port = eth_dev->data->port_id;
+ struct cnxk_eth_rxq_sp *rxq_sp;
+ int i, rc = 0;
+
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+ rxq_sp = eth_dev->data->rx_queues[i];
+ rxq_sp = rxq_sp - 1;
+ cnxk_sso_updt_xae_cnt(dev, rxq_sp,
+ RTE_EVENT_TYPE_ETHDEV);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc |= cnxk_sso_rxq_enable(
+ cnxk_eth_dev, i, port, &queue_conf->ev,
+ !!(queue_conf->rx_queue_flags &
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID));
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, true,
+ dev->force_ena_bp);
+ }
+ } else {
+ rxq_sp = eth_dev->data->rx_queues[rx_queue_id];
+ rxq_sp = rxq_sp - 1;
+ cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc |= cnxk_sso_rxq_enable(
+ cnxk_eth_dev, (uint16_t)rx_queue_id, port,
+ &queue_conf->ev,
+ !!(queue_conf->rx_queue_flags &
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID));
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, true,
+ dev->force_ena_bp);
+ }
+
+ if (rc < 0) {
+ plt_err("Failed to configure Rx adapter port=%d, q=%d", port,
+ queue_conf->ev.queue_id);
+ return rc;
+ }
+
+ dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags;
+
+ return 0;
+}
+
+int
+cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ struct cnxk_eth_rxq_sp *rxq_sp;
+ int i, rc = 0;
+
+ RTE_SET_USED(event_dev);
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+ rxq_sp = eth_dev->data->rx_queues[rx_queue_id];
+ rxq_sp = rxq_sp - 1;
+ rc = cnxk_sso_rxq_disable(cnxk_eth_dev, i);
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, false,
+ dev->force_ena_bp);
+ }
+ } else {
+ rxq_sp = eth_dev->data->rx_queues[rx_queue_id];
+ rxq_sp = rxq_sp - 1;
+ rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id);
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, false,
+ dev->force_ena_bp);
+ }
+
+ if (rc < 0)
+ plt_err("Failed to clear Rx adapter config port=%d, q=%d",
+ eth_dev->data->port_id, rx_queue_id);
+
+ return rc;
+}
+
+int
+cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev)
+{
+ RTE_SET_USED(event_dev);
+ RTE_SET_USED(eth_dev);
+
+ return 0;
+}
+
+int
+cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev)
+{
+ RTE_SET_USED(event_dev);
+ RTE_SET_USED(eth_dev);
+
+ return 0;
+}
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 87bb9f76a9..eda562f5b5 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -21,4 +21,11 @@ sources = files(
'cnxk_tim_worker.c',
)
-deps += ['bus_pci', 'common_cnxk']
+extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
+foreach flag: extra_flags
+ if cc.has_argument(flag)
+ cflags += flag
+ endif
+endforeach
+
+deps += ['bus_pci', 'common_cnxk', 'net_cnxk']
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v3 08/13] event/cnxk: add Rx adapter fastpath ops
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
` (5 preceding siblings ...)
2021-06-20 20:29 ` [dpdk-dev] [PATCH v3 07/13] event/cnxk: add Rx adapter support pbhagavatula
@ 2021-06-20 20:29 ` pbhagavatula
2021-06-20 20:29 ` [dpdk-dev] [PATCH v3 09/13] event/cnxk: add Tx adapter support pbhagavatula
` (7 subsequent siblings)
14 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-20 20:29 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Rx adapter fastpath operations.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 136 +++++++-
drivers/event/cnxk/cn10k_worker.c | 54 ----
drivers/event/cnxk/cn10k_worker.h | 97 +++++-
drivers/event/cnxk/cn10k_worker_deq.c | 44 +++
drivers/event/cnxk/cn10k_worker_deq_burst.c | 29 ++
drivers/event/cnxk/cn10k_worker_deq_tmo.c | 72 +++++
drivers/event/cnxk/cn9k_eventdev.c | 305 +++++++++++++++++-
drivers/event/cnxk/cn9k_worker.c | 117 -------
drivers/event/cnxk/cn9k_worker.h | 174 ++++++++--
drivers/event/cnxk/cn9k_worker_deq.c | 44 +++
drivers/event/cnxk/cn9k_worker_deq_burst.c | 29 ++
drivers/event/cnxk/cn9k_worker_deq_tmo.c | 72 +++++
drivers/event/cnxk/cn9k_worker_dual_deq.c | 53 +++
.../event/cnxk/cn9k_worker_dual_deq_burst.c | 30 ++
drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 89 +++++
drivers/event/cnxk/cnxk_eventdev.h | 1 +
drivers/event/cnxk/meson.build | 9 +
17 files changed, 1124 insertions(+), 231 deletions(-)
create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c
create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c
create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 2060c8fe84..ba7d95fff7 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -237,17 +237,141 @@ static void
cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst;
-
- event_dev->dequeue = cn10k_sso_hws_deq;
- event_dev->dequeue_burst = cn10k_sso_hws_deq_burst;
- if (dev->is_timeout_deq) {
- event_dev->dequeue = cn10k_sso_hws_tmo_deq;
- event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_deq_seg
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_seg_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_tmo_deq_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_deq
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_tmo_deq
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_tmo_deq_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
}
}
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index 5dbae275ba..c71aa37327 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -60,57 +60,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-
-uint16_t __rte_hot
-cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn10k_sso_hws *ws = port;
-
- RTE_SET_USED(timeout_ticks);
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
- return 1;
- }
-
- return cn10k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
- uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn10k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn10k_sso_hws *ws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
- return ret;
- }
-
- ret = cn10k_sso_hws_get_work(ws, ev);
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
- ret = cn10k_sso_hws_get_work(ws, ev);
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index c7250bf9e7..b724083caa 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -87,20 +87,37 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
cn10k_sso_hws_fwd_group(ws, ev, grp);
}
+static __rte_always_inline void
+cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+ const uint32_t tag, const uint32_t flags,
+ const void *const lookup_mem)
+{
+ const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+
+ cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+ (struct rte_mbuf *)mbuf, lookup_mem,
+ mbuf_init | ((uint64_t)port_id) << 48, flags);
+}
+
static __rte_always_inline uint16_t
-cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
+cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
+ const uint32_t flags, void *lookup_mem)
{
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
gw.get_work = ws->gw_wdata;
#if defined(RTE_ARCH_ARM64) && !defined(__clang__)
asm volatile(
PLT_CPU_FEATURE_PREAMBLE
"caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
- : [wdata] "+r"(gw.get_work)
+ "sub %[mbuf], %H[wdata], #0x80 \n"
+ : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf)
: [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
: "memory");
#else
@@ -109,11 +126,34 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
roc_load_pair(gw.u64[0], gw.u64[1],
ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
+ ws->tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -128,6 +168,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t mbuf;
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -138,7 +179,9 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
" ldp %[tag], %[wqp], [%[tag_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
: "memory");
#else
@@ -146,12 +189,25 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
roc_load_pair(gw.u64[0], gw.u64[1],
ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, 0, NULL);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -170,16 +226,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
const struct rte_event ev[],
uint16_t nb_events);
-uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
#endif
diff --git a/drivers/event/cnxk/cn10k_worker_deq.c b/drivers/event/cnxk/cn10k_worker_deq.c
new file mode 100644
index 0000000000..36ec454ccc
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return 1; \
+ } \
+ \
+ return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return 1; \
+ } \
+ \
+ return cn10k_sso_hws_get_work( \
+ ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn10k_worker_deq_burst.c b/drivers/event/cnxk/cn10k_worker_deq_burst.c
new file mode 100644
index 0000000000..29ecc551cf
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq_burst.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn10k_worker_deq_tmo.c b/drivers/event/cnxk/cn10k_worker_deq_tmo.c
new file mode 100644
index 0000000000..c8524a27bd
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq_tmo.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return ret; \
+ } \
+ \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return ret; \
+ } \
+ \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 072800c243..e386cb784a 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -252,17 +252,202 @@ static void
cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ /* Single WS modes */
+ const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_tmo[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_tmo_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_tmo_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_deq_tmo_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ /* Dual WS modes */
+ const event_dequeue_t sso_hws_dual_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_tmo[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_tmo_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_tmo_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_tmo_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
event_dev->enqueue = cn9k_sso_hws_enq;
event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst;
-
- event_dev->dequeue = cn9k_sso_hws_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_deq_burst;
- if (dev->deq_tmo_ns) {
- event_dev->dequeue = cn9k_sso_hws_tmo_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_deq_seg
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_seg_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_deq_tmo_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_tmo_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_deq
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_deq_tmo
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_tmo_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
}
if (dev->dual_ws) {
@@ -272,14 +457,110 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
event_dev->enqueue_forward_burst =
cn9k_sso_hws_dual_enq_fwd_burst;
- event_dev->dequeue = cn9k_sso_hws_dual_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst;
- if (dev->deq_tmo_ns) {
- event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq;
- event_dev->dequeue_burst =
- cn9k_sso_hws_dual_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_dual_deq_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_dual_deq_tmo_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst =
+ sso_hws_dual_deq_tmo_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_dual_deq
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_dual_deq_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_dual_deq_tmo
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst =
+ sso_hws_dual_deq_tmo_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ }
}
}
+
+ rte_mb();
}
static void *
diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c
index 9ceacc98dd..538bc4b0b3 100644
--- a/drivers/event/cnxk/cn9k_worker.c
+++ b/drivers/event/cnxk/cn9k_worker.c
@@ -60,60 +60,6 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-uint16_t __rte_hot
-cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws *ws = port;
-
- RTE_SET_USED(timeout_ticks);
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_op);
- return 1;
- }
-
- return cn9k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
- uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws *ws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_op);
- return ret;
- }
-
- ret = cn9k_sso_hws_get_work(ws, ev);
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
- ret = cn9k_sso_hws_get_work(ws, ev);
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
-
/* Dual ws ops. */
uint16_t __rte_hot
@@ -171,66 +117,3 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws_dual *dws = port;
- uint16_t gw;
-
- RTE_SET_USED(timeout_ticks);
- if (dws->swtag_req) {
- dws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
- return 1;
- }
-
- gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- return gw;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws_dual *dws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (dws->swtag_req) {
- dws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
- return ret;
- }
-
- ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) {
- ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- }
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index f5a4401465..c01c00e1da 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -128,17 +128,36 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws,
}
}
+static __rte_always_inline void
+cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+ const uint32_t tag, const uint32_t flags,
+ const void *const lookup_mem)
+{
+ const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+
+ cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+ (struct rte_mbuf *)mbuf, lookup_mem,
+ mbuf_init | ((uint64_t)port_id) << 48, flags);
+}
+
static __rte_always_inline uint16_t
cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
struct cn9k_sso_hws_state *ws_pair,
- struct rte_event *ev)
+ struct rte_event *ev, const uint32_t flags,
+ const void *const lookup_mem,
+ struct cnxk_timesync_info *const tstamp)
{
const uint64_t set_gw = BIT_ULL(16) | 1;
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
+ if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+ rte_prefetch_non_temporal(lookup_mem);
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
"rty%=: \n"
@@ -147,7 +166,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
" tbnz %[tag], 63, rty%= \n"
"done%=: str %[gw], [%[pong]] \n"
" dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ " prfm pldl1keep, [%[mbuf]] \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op),
[gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op));
#else
@@ -156,12 +178,34 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
plt_write64(set_gw, ws_pair->getwrk_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -169,16 +213,22 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
}
static __rte_always_inline uint16_t
-cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
+cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev,
+ const uint32_t flags, const void *const lookup_mem)
{
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
plt_write64(BIT_ULL(16) | /* wait for work. */
1, /* Use Mask set 0. */
ws->getwrk_op);
+
+ if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+ rte_prefetch_non_temporal(lookup_mem);
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
" ldr %[tag], [%[tag_loc]] \n"
@@ -190,7 +240,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
" ldr %[wqp], [%[wqp_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ " prfm pldl1keep, [%[mbuf]] \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
#else
gw.u64[0] = plt_read64(ws->tag_op);
@@ -198,12 +251,35 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
+ ws->tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -218,6 +294,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t mbuf;
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -230,7 +307,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
" ldr %[wqp], [%[wqp_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
#else
gw.u64[0] = plt_read64(ws->tag_op);
@@ -238,12 +317,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, 0, NULL);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -274,28 +366,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port,
const struct rte_event ev[],
uint16_t nb_events);
-uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-
-uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
#endif
diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c
new file mode 100644
index 0000000000..51ccaf4ec4
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return 1; \
+ } \
+ \
+ return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return 1; \
+ } \
+ \
+ return cn9k_sso_hws_get_work( \
+ ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_deq_burst.c b/drivers/event/cnxk/cn9k_worker_deq_burst.c
new file mode 100644
index 0000000000..4e2801459b
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq_burst.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
new file mode 100644
index 0000000000..9713d1ef00
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c
new file mode 100644
index 0000000000..709fa2d9ef
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t gw; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return 1; \
+ } \
+ \
+ gw = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ return gw; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t gw; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return 1; \
+ } \
+ \
+ gw = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ return gw; \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
new file mode 100644
index 0000000000..d50e1cf83f
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
new file mode 100644
index 0000000000..a0508fdf0d
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], \
+ &dws->ws_state[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ } \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_tmo_##name(port, ev, \
+ timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], \
+ &dws->ws_state[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ } \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index b65d725f55..9d5d2d0339 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -33,6 +33,7 @@
#define CNXK_SSO_MZ_NAME "cnxk_evdev_mz"
#define CNXK_SSO_XAQ_CACHE_CNT (0x7)
#define CNXK_SSO_XAQ_SLACK (8)
+#define CNXK_SSO_WQE_SG_PTR (9)
#define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY)
#define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY)
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index eda562f5b5..c5c1c0ee8e 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -11,8 +11,17 @@ endif
sources = files(
'cn9k_eventdev.c',
'cn9k_worker.c',
+ 'cn9k_worker_deq.c',
+ 'cn9k_worker_deq_burst.c',
+ 'cn9k_worker_deq_tmo.c',
+ 'cn9k_worker_dual_deq.c',
+ 'cn9k_worker_dual_deq_burst.c',
+ 'cn9k_worker_dual_deq_tmo.c',
'cn10k_eventdev.c',
'cn10k_worker.c',
+ 'cn10k_worker_deq.c',
+ 'cn10k_worker_deq_burst.c',
+ 'cn10k_worker_deq_tmo.c',
'cnxk_eventdev.c',
'cnxk_eventdev_adptr.c',
'cnxk_eventdev_selftest.c',
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v3 09/13] event/cnxk: add Tx adapter support
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
` (6 preceding siblings ...)
2021-06-20 20:29 ` [dpdk-dev] [PATCH v3 08/13] event/cnxk: add Rx adapter fastpath ops pbhagavatula
@ 2021-06-20 20:29 ` pbhagavatula
2021-06-20 20:29 ` [dpdk-dev] [PATCH v3 10/13] event/cnxk: add Tx adapter fastpath ops pbhagavatula
` (6 subsequent siblings)
14 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-20 20:29 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Tx adapter.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 4 +-
doc/guides/rel_notes/release_21_08.rst | 6 +-
drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++++++
drivers/event/cnxk/cn9k_eventdev.c | 117 +++++++++++++++++++++++
drivers/event/cnxk/cnxk_eventdev.h | 21 +++-
drivers/event/cnxk/cnxk_eventdev_adptr.c | 106 ++++++++++++++++++++
6 files changed, 339 insertions(+), 6 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index b7e82c1273..6fdccc2ab4 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -42,7 +42,9 @@ Features of the OCTEON cnxk SSO PMD are:
- HW managed packets enqueued from ethdev to eventdev exposed through event eth
RX adapter.
- N:1 ethernet device Rx queue to Event queue mapping.
-- Full Rx offload support defined through ethdev queue configuration.
+- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE``
+ capability while maintaining receive packet order.
+- Full Rx/Tx offload support defined through ethdev queue configuration.
Prerequisites and Compilation procedure
---------------------------------------
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 3892c8017a..80ff93269c 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -60,10 +60,10 @@ New Features
* Added net/cnxk driver which provides the support for the integrated ethernet
device.
-* **Added support for Marvell CN10K, CN9K, event Rx adapter.**
+* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.**
- * Added Rx adapter support for event/cnxk when the ethernet device requested is
- net/cnxk.
+ * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
+ is net/cnxk.
Removed Items
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index ba7d95fff7..8a9b04a3db 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+ ws->tx_base = ws->base;
ws->hws_id = port_id;
ws->swtag_req = 0;
ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
}
+static int
+cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ if (dev->tx_adptr_data == NULL)
+ return 0;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ struct cn10k_sso_hws *ws = event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(ws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn10k_sso_hws) +
+ (sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = ws;
+ }
+
+ return 0;
+}
+
static void
cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
@@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
{
int rc;
+ rc = cn10k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+
rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
cn10k_sso_hws_flush_events);
if (rc < 0)
@@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (ret)
+ *caps = 0;
+ else
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+ return 0;
+}
+
+static int
+cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ rc = cn10k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+ cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ return cn10k_sso_updt_tx_adptr_data(event_dev);
+}
+
static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.dev_infos_get = cn10k_sso_info_get,
.dev_configure = cn10k_sso_dev_configure,
@@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get,
+ .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add,
+ .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index e386cb784a..bdc5632235 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
}
+static int
+cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ if (dev->tx_adptr_data == NULL)
+ return 0;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ if (dev->dual_ws) {
+ struct cn9k_sso_hws_dual *dws =
+ event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(dws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn9k_sso_hws_dual) +
+ (sizeof(uint64_t) *
+ (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ dws = RTE_PTR_ADD(ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&dws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = dws;
+ } else {
+ struct cn9k_sso_hws *ws = event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(ws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn9k_sso_hws_dual) +
+ (sizeof(uint64_t) *
+ (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ ws = RTE_PTR_ADD(ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = ws;
+ }
+ }
+ rte_mb();
+
+ return 0;
+}
+
static void
cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
@@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev)
{
int rc;
+ rc = cn9k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+
rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset,
cn9k_sso_hws_flush_events);
if (rc < 0)
@@ -844,6 +908,55 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (ret)
+ *caps = 0;
+ else
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+ return 0;
+}
+
+static int
+cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ rc = cn9k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+ cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ return cn9k_sso_updt_tx_adptr_data(event_dev);
+}
+
static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.dev_infos_get = cn9k_sso_info_get,
.dev_configure = cn9k_sso_dev_configure,
@@ -863,6 +976,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get,
+ .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add,
+ .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 9d5d2d0339..458fdc8d92 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -8,6 +8,7 @@
#include <rte_devargs.h>
#include <rte_ethdev.h>
#include <rte_event_eth_rx_adapter.h>
+#include <rte_event_eth_tx_adapter.h>
#include <rte_kvargs.h>
#include <rte_mbuf_pool_ops.h>
#include <rte_pci.h>
@@ -86,9 +87,12 @@ struct cnxk_sso_evdev {
rte_iova_t fc_iova;
struct rte_mempool *xaq_pool;
uint64_t rx_offloads;
+ uint64_t tx_offloads;
uint64_t adptr_xae_cnt;
uint16_t rx_adptr_pool_cnt;
uint64_t *rx_adptr_pools;
+ uint64_t *tx_adptr_data;
+ uint16_t max_port_id;
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
@@ -115,7 +119,10 @@ struct cn10k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
+ /* Tx Fastpath data */
+ uint64_t tx_base __rte_cache_aligned;
uintptr_t lmt_base;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
/* CN9K HWS ops */
@@ -140,7 +147,9 @@ struct cn9k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base;
+ /* Tx Fastpath data */
+ uint64_t base __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct cn9k_sso_hws_state {
@@ -160,7 +169,9 @@ struct cn9k_sso_hws_dual {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base[2];
+ /* Tx Fastpath data */
+ uint64_t base[2] __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct cnxk_sso_hws_cookie {
@@ -267,5 +278,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
const struct rte_eth_dev *eth_dev);
int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
const struct rte_eth_dev *eth_dev);
+int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id);
+int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id);
#endif /* __CNXK_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 24bfd985e7..548d7b81ce 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -5,6 +5,8 @@
#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
+#define CNXK_SSO_SQB_LIMIT (0x180)
+
void
cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
uint32_t event_type)
@@ -241,3 +243,107 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
return 0;
}
+
+static int
+cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs)
+{
+ uint16_t sqb_limit;
+
+ sqb_limit = RTE_MIN(nb_sqb_bufs, sq->nb_sqb_bufs);
+ return roc_npa_aura_limit_modify(sq->aura_handle, sqb_limit);
+}
+
+static int
+cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev,
+ uint16_t eth_port_id, uint16_t tx_queue_id,
+ void *txq)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ uint16_t max_port_id = dev->max_port_id;
+ uint64_t *txq_data = dev->tx_adptr_data;
+
+ if (txq_data == NULL || eth_port_id > max_port_id) {
+ max_port_id = RTE_MAX(max_port_id, eth_port_id);
+ txq_data = rte_realloc_socket(
+ txq_data,
+ (sizeof(uint64_t) * (max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, event_dev->data->socket_id);
+ if (txq_data == NULL)
+ return -ENOMEM;
+ }
+
+ ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT])
+ txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq;
+ dev->max_port_id = max_port_id;
+ dev->tx_adptr_data = txq_data;
+ return 0;
+}
+
+int
+cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ struct roc_nix_sq *sq;
+ int i, ret;
+ void *txq;
+
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
+ txq = eth_dev->data->tx_queues[i];
+ sq = &cnxk_eth_dev->sqs[i];
+ cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, i, txq);
+ if (ret < 0)
+ return ret;
+ }
+ } else {
+ txq = eth_dev->data->tx_queues[tx_queue_id];
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id, txq);
+ if (ret < 0)
+ return ret;
+ }
+
+ dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags;
+
+ return 0;
+}
+
+int
+cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct roc_nix_sq *sq;
+ int i, ret;
+
+ RTE_SET_USED(event_dev);
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
+ sq = &cnxk_eth_dev->sqs[i];
+ cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id,
+ NULL);
+ if (ret < 0)
+ return ret;
+ }
+ } else {
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id, NULL);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v3 10/13] event/cnxk: add Tx adapter fastpath ops
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
` (7 preceding siblings ...)
2021-06-20 20:29 ` [dpdk-dev] [PATCH v3 09/13] event/cnxk: add Tx adapter support pbhagavatula
@ 2021-06-20 20:29 ` pbhagavatula
2021-06-20 20:29 ` [dpdk-dev] [PATCH v3 11/13] event/cnxk: add Rx adapter vector support pbhagavatula
` (5 subsequent siblings)
14 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-20 20:29 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Tx adapter fastpath operations.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 38 ++++++++
drivers/event/cnxk/cn10k_worker.h | 67 ++++++++++++++
drivers/event/cnxk/cn10k_worker_tx_enq.c | 23 +++++
drivers/event/cnxk/cn10k_worker_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cn9k_eventdev.c | 81 +++++++++++++++++
drivers/event/cnxk/cn9k_worker.h | 87 +++++++++++++++++++
drivers/event/cnxk/cn9k_worker_dual_tx_enq.c | 23 +++++
.../event/cnxk/cn9k_worker_dual_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cn9k_worker_tx_enq.c | 23 +++++
drivers/event/cnxk/cn9k_worker_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/meson.build | 6 ++
11 files changed, 417 insertions(+)
create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c
create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 8a9b04a3db..e462f770c5 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
#undef R
};
+ /* Tx modes */
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
@@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
}
}
+
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
+
+ event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
}
static void
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index b724083caa..3c90c85009 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -11,6 +11,7 @@
#include "cn10k_ethdev.h"
#include "cn10k_rx.h"
+#include "cn10k_tx.h"
/* SSO Operations */
@@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
NIX_RX_FASTPATH_MODES
#undef R
+static __rte_always_inline const struct cn10k_eth_txq *
+cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+ return (const struct cn10k_eth_txq *)
+ txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline uint16_t
+cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
+ uint64_t *cmd,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ const struct cn10k_eth_txq *txq;
+ struct rte_mbuf *m = ev->mbuf;
+ uint16_t ref_cnt = m->refcnt;
+ uintptr_t lmt_addr;
+ uint16_t lmt_id;
+ uintptr_t pa;
+
+ lmt_addr = ws->lmt_base;
+ ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+ txq = cn10k_sso_hws_xtract_meta(m, txq_data);
+ cn10k_nix_tx_skeleton(txq, cmd, flags);
+ /* Perform header writes before barrier for TSO */
+ if (flags & NIX_TX_OFFLOAD_TSO_F)
+ cn10k_nix_xmit_prepare_tso(m, flags);
+
+ cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt);
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw =
+ cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags);
+ pa = txq->io_addr | ((segdw - 1) << 4);
+ } else {
+ pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
+ }
+ if (!ev->sched_type)
+ cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
+
+ roc_lmt_submit_steorl(lmt_id, pa);
+
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if (ref_cnt > 1)
+ return 1;
+ }
+
+ cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
+ ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+ return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
#endif
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c
new file mode 100644
index 0000000000..f9968ac0d0
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn10k_sso_hws_event_tx( \
+ ws, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
new file mode 100644
index 0000000000..a24fc42e5a
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn10k_sso_hws_event_tx( \
+ ws, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index bdc5632235..af97020f2f 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -430,6 +430,39 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
#undef R
};
+ /* Tx modes */
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
event_dev->enqueue = cn9k_sso_hws_enq;
event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
@@ -510,6 +543,25 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
}
}
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
+
if (dev->dual_ws) {
event_dev->enqueue = cn9k_sso_hws_dual_enq;
event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst;
@@ -618,8 +670,37 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
NIX_RX_OFFLOAD_RSS_F)];
}
}
+
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM]
+ */
+ event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
}
+ event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
rte_mb();
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index c01c00e1da..5aa053c586 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -11,6 +11,7 @@
#include "cn9k_ethdev.h"
#include "cn9k_rx.h"
+#include "cn9k_tx.h"
/* SSO Operations */
@@ -416,4 +417,90 @@ NIX_RX_FASTPATH_MODES
NIX_RX_FASTPATH_MODES
#undef R
+static __rte_always_inline const struct cn9k_eth_txq *
+cn9k_sso_hws_xtract_meta(struct rte_mbuf *m,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+ return (const struct cn9k_eth_txq *)
+ txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline void
+cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m,
+ uint64_t *cmd, const uint32_t flags)
+{
+ roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags));
+ cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt);
+}
+
+static __rte_always_inline uint16_t
+cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ struct rte_mbuf *m = ev->mbuf;
+ const struct cn9k_eth_txq *txq;
+ uint16_t ref_cnt = m->refcnt;
+
+ /* Perform header writes before barrier for TSO */
+ cn9k_nix_xmit_prepare_tso(m, flags);
+ /* Lets commit any changes in the packet here in case when
+ * fast free is set as no further changes will be made to mbuf.
+ * In case of fast free is not set, both cn9k_nix_prepare_mseg()
+ * and cn9k_nix_xmit_prepare() has a barrier after refcnt update.
+ */
+ if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
+ rte_io_wmb();
+ txq = cn9k_sso_hws_xtract_meta(m, txq_data);
+ cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags);
+
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
+ if (!CNXK_TT_FROM_EVENT(ev->event)) {
+ cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
+ cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+ cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
+ txq->io_addr, segdw);
+ } else {
+ cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr,
+ segdw);
+ }
+ } else {
+ if (!CNXK_TT_FROM_EVENT(ev->event)) {
+ cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
+ cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+ cn9k_nix_xmit_one(cmd, txq->lmt_addr,
+ txq->io_addr, flags);
+ } else {
+ cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr,
+ flags);
+ }
+ }
+
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if (ref_cnt > 1)
+ return 1;
+ }
+
+ cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG,
+ base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+ return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
#endif
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
new file mode 100644
index 0000000000..92e2981f02
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn9k_sso_hws_dual *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base[!ws->vws], &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
new file mode 100644
index 0000000000..dfb574cf95
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn9k_sso_hws_dual *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base[!ws->vws], &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c
new file mode 100644
index 0000000000..3df649c0c8
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
new file mode 100644
index 0000000000..0efe29113e
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index c5c1c0ee8e..13e0634e86 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -17,11 +17,17 @@ sources = files(
'cn9k_worker_dual_deq.c',
'cn9k_worker_dual_deq_burst.c',
'cn9k_worker_dual_deq_tmo.c',
+ 'cn9k_worker_tx_enq.c',
+ 'cn9k_worker_tx_enq_seg.c',
+ 'cn9k_worker_dual_tx_enq.c',
+ 'cn9k_worker_dual_tx_enq_seg.c',
'cn10k_eventdev.c',
'cn10k_worker.c',
'cn10k_worker_deq.c',
'cn10k_worker_deq_burst.c',
'cn10k_worker_deq_tmo.c',
+ 'cn10k_worker_tx_enq.c',
+ 'cn10k_worker_tx_enq_seg.c',
'cnxk_eventdev.c',
'cnxk_eventdev_adptr.c',
'cnxk_eventdev_selftest.c',
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v3 11/13] event/cnxk: add Rx adapter vector support
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
` (8 preceding siblings ...)
2021-06-20 20:29 ` [dpdk-dev] [PATCH v3 10/13] event/cnxk: add Tx adapter fastpath ops pbhagavatula
@ 2021-06-20 20:29 ` pbhagavatula
2021-06-20 20:29 ` [dpdk-dev] [PATCH v3 12/13] event/cnxk: add Rx event vector fastpath pbhagavatula
` (4 subsequent siblings)
14 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-20 20:29 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add event vector support for cnxk event Rx adapter, add control path
APIs to get vector limits and ability to configure event vectorization
on a given Rx queue.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 2 +
drivers/event/cnxk/cn10k_eventdev.c | 106 ++++++++++++++++++++++-
drivers/event/cnxk/cnxk_eventdev.h | 2 +
drivers/event/cnxk/cnxk_eventdev_adptr.c | 25 ++++++
drivers/net/cnxk/cnxk_ethdev.h | 2 +-
5 files changed, 135 insertions(+), 2 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 6fdccc2ab4..0297cd3d5f 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -45,6 +45,8 @@ Features of the OCTEON cnxk SSO PMD are:
- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE``
capability while maintaining receive packet order.
- Full Rx/Tx offload support defined through ethdev queue configuration.
+- HW managed event vectorization on CN10K for packets enqueued from ethdev to
+ eventdev configurable per each Rx queue in Rx adapter.
Prerequisites and Compilation procedure
---------------------------------------
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index e462f770c5..e85fa4785d 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
else
*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
- RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
return 0;
}
@@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn10k_sso_rx_adapter_vector_limits(
+ const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+ struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev;
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (ret)
+ return -ENOTSUP;
+
+ cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+ limits->log2_sz = true;
+ limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+ limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+ limits->min_timeout_ns =
+ (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100;
+ limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns;
+
+ return 0;
+}
+
+static int
+cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
+ uint16_t port_id, uint16_t rq_id, uint16_t sz,
+ uint64_t tmo_ns, struct rte_mempool *vmp)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+
+ if (!rq->sso_ena)
+ return -EINVAL;
+ if (rq->flow_tag_width == 0)
+ return -EINVAL;
+
+ rq->vwqe_ena = 1;
+ rq->vwqe_first_skip = 0;
+ rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id);
+ rq->vwqe_max_sz_exp = rte_log2_u32(sz);
+ rq->vwqe_wait_tmo =
+ tmo_ns /
+ ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100);
+ rq->tag_mask = (port_id & 0xF) << 20;
+ rq->tag_mask |=
+ (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4))
+ << 24;
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+static int
+cn10k_sso_rx_adapter_vector_config(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_event_vector_config *config)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev;
+ struct cnxk_sso_evdev *dev;
+ int i, rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ dev = cnxk_sso_pmd_priv(event_dev);
+ cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+ cnxk_sso_updt_xae_cnt(dev, config->vector_mp,
+ RTE_EVENT_TYPE_ETHDEV_VECTOR);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc = cnxk_sso_rx_adapter_vwqe_enable(
+ cnxk_eth_dev, eth_dev->data->port_id, i,
+ config->vector_sz, config->vector_timeout_ns,
+ config->vector_mp);
+ if (rc)
+ return -EINVAL;
+ }
+ } else {
+
+ cnxk_sso_updt_xae_cnt(dev, config->vector_mp,
+ RTE_EVENT_TYPE_ETHDEV_VECTOR);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc = cnxk_sso_rx_adapter_vwqe_enable(
+ cnxk_eth_dev, eth_dev->data->port_id, rx_queue_id,
+ config->vector_sz, config->vector_timeout_ns,
+ config->vector_mp);
+ if (rc)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int
cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
const struct rte_eth_dev *eth_dev, uint32_t *caps)
@@ -739,6 +839,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_rx_adapter_vector_limits_get = cn10k_sso_rx_adapter_vector_limits,
+ .eth_rx_adapter_event_vector_config =
+ cn10k_sso_rx_adapter_vector_config,
+
.eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get,
.eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add,
.eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 458fdc8d92..3783e0c95b 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -96,6 +96,8 @@ struct cnxk_sso_evdev {
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
+ uint16_t vec_pool_cnt;
+ uint64_t *vec_pools;
/* Dev args */
uint32_t xae_cnt;
uint8_t qos_queue_cnt;
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 548d7b81ce..c4c4f5a7f4 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -40,6 +40,31 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
dev->adptr_xae_cnt += rxq->qconf.mp->size;
break;
}
+ case RTE_EVENT_TYPE_ETHDEV_VECTOR: {
+ struct rte_mempool *mp = data;
+ uint64_t *old_ptr;
+
+ for (i = 0; i < dev->vec_pool_cnt; i++) {
+ if ((uint64_t)mp == dev->vec_pools[i])
+ return;
+ }
+
+ dev->vec_pool_cnt++;
+ old_ptr = dev->vec_pools;
+ dev->vec_pools =
+ rte_realloc(dev->vec_pools,
+ sizeof(uint64_t) * dev->vec_pool_cnt, 0);
+ if (dev->vec_pools == NULL) {
+ dev->adptr_xae_cnt += mp->size;
+ dev->vec_pools = old_ptr;
+ dev->vec_pool_cnt--;
+ return;
+ }
+ dev->vec_pools[dev->vec_pool_cnt - 1] = (uint64_t)mp;
+
+ dev->adptr_xae_cnt += mp->size;
+ break;
+ }
case RTE_EVENT_TYPE_TIMER: {
struct cnxk_tim_ring *timr = data;
uint16_t *old_ring_ptr;
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 4eead03905..2528b3cdaa 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -238,7 +238,7 @@ struct cnxk_eth_txq_sp {
} __plt_cache_aligned;
static inline struct cnxk_eth_dev *
-cnxk_eth_pmd_priv(struct rte_eth_dev *eth_dev)
+cnxk_eth_pmd_priv(const struct rte_eth_dev *eth_dev)
{
return eth_dev->data->dev_private;
}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v3 12/13] event/cnxk: add Rx event vector fastpath
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
` (9 preceding siblings ...)
2021-06-20 20:29 ` [dpdk-dev] [PATCH v3 11/13] event/cnxk: add Rx adapter vector support pbhagavatula
@ 2021-06-20 20:29 ` pbhagavatula
2021-06-20 20:29 ` [dpdk-dev] [PATCH v3 13/13] event/cnxk: add Tx " pbhagavatula
` (3 subsequent siblings)
14 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-20 20:29 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add Rx event vector fastpath to convert HW defined metadata into
rte_mbuf and rte_event_vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/rel_notes/release_21_08.rst | 1 +
drivers/event/cnxk/cn10k_worker.h | 56 +++++++
drivers/net/cnxk/cn10k_rx.h | 200 +++++++++++++++----------
drivers/net/cnxk/cn10k_rx_vec.c | 2 +-
drivers/net/cnxk/cn10k_rx_vec_mseg.c | 5 +-
5 files changed, 179 insertions(+), 85 deletions(-)
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 80ff93269c..11ccc9bcb5 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -64,6 +64,7 @@ New Features
* Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
is net/cnxk.
+ * Add support for event vectorization for Rx adapter.
Removed Items
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 3c90c85009..7a48a6b17d 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,6 +5,8 @@
#ifndef __CN10K_WORKER_H__
#define __CN10K_WORKER_H__
+#include <rte_vect.h>
+
#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
@@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
mbuf_init | ((uint64_t)port_id) << 48, flags);
}
+static __rte_always_inline void
+cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags,
+ void *lookup_mem, void *tstamp)
+{
+ uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+ struct rte_event_vector *vec;
+ uint16_t nb_mbufs, non_vec;
+ uint64_t **wqe;
+
+ mbuf_init |= ((uint64_t)port_id) << 48;
+ vec = (struct rte_event_vector *)vwqe;
+ wqe = vec->u64s;
+
+ nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+ nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs,
+ flags | NIX_RX_VWQE_F, lookup_mem,
+ tstamp);
+ wqe += nb_mbufs;
+ non_vec = vec->nb_elem - nb_mbufs;
+
+ while (non_vec) {
+ struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+ struct rte_mbuf *mbuf;
+ uint64_t tstamp_ptr;
+
+ mbuf = (struct rte_mbuf *)((char *)cqe -
+ sizeof(struct rte_mbuf));
+ cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem,
+ mbuf_init, flags);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ wqe[0] = (uint64_t *)mbuf;
+ non_vec--;
+ wqe++;
+ }
+}
+
static __rte_always_inline uint16_t
cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
const uint32_t flags, void *lookup_mem)
@@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
flags & NIX_RX_MULTI_SEG_F,
(uint64_t *)tstamp_ptr);
gw.u64[1] = mbuf;
+ } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+ __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1];
+
+ vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) |
+ ((vwqe_hdr & 0xFFFF) << 48) |
+ ((uint64_t)port << 32);
+ *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr;
+ cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem,
+ ws->tstamp);
}
}
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index d9572b19e7..a506a867ca 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -21,6 +21,7 @@
* Defining it from backwards to denote its been
* not used as offload flags to pick function
*/
+#define NIX_RX_VWQE_F BIT(14)
#define NIX_RX_MULTI_SEG_F BIT(15)
#define CNXK_NIX_CQ_ENTRY_SZ 128
@@ -28,6 +29,11 @@
#define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x))
#define CQE_SZ(x) ((x) * CNXK_NIX_CQ_ENTRY_SZ)
+#define CQE_PTR_OFF(b, i, o, f) \
+ (((f) & NIX_RX_VWQE_F) ? \
+ (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \
+ (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o)))
+
union mbuf_initializer {
struct {
uint16_t data_off;
@@ -317,61 +323,87 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf)
}
static __rte_always_inline uint16_t
-cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t pkts, const uint16_t flags)
+cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts,
+ const uint16_t flags, void *lookup_mem,
+ struct cnxk_timesync_info *tstamp)
{
- struct cn10k_eth_rxq *rxq = rx_queue;
- uint16_t packets = 0;
+ struct cn10k_eth_rxq *rxq = args;
+ const uint64_t mbuf_initializer = (flags & NIX_RX_VWQE_F) ?
+ *(uint64_t *)args :
+ rxq->mbuf_initializer;
+ const uint64x2_t data_off = flags & NIX_RX_VWQE_F ?
+ vdupq_n_u64(0x80ULL) :
+ vdupq_n_u64(rxq->data_off);
+ const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask;
+ const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata;
+ const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc;
uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23;
- const uint64_t mbuf_initializer = rxq->mbuf_initializer;
- const uint64x2_t data_off = vdupq_n_u64(rxq->data_off);
uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3;
uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer);
struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3;
- const uint16_t *lookup_mem = rxq->lookup_mem;
- const uint32_t qmask = rxq->qmask;
- const uint64_t wdata = rxq->wdata;
- const uintptr_t desc = rxq->desc;
uint8x16_t f0, f1, f2, f3;
- uint32_t head = rxq->head;
+ uint16_t packets = 0;
uint16_t pkts_left;
-
- pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
- pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
-
- /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
- pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ uint32_t head;
+ uintptr_t cq0;
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ lookup_mem = rxq->lookup_mem;
+ head = rxq->head;
+
+ pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
+ pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
+ /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+ tstamp = rxq->tstamp;
+ } else {
+ RTE_SET_USED(head);
+ }
while (packets < pkts) {
- /* Exit loop if head is about to wrap and become unaligned */
- if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
- NIX_DESCS_PER_LOOP) {
- pkts_left += (pkts - packets);
- break;
- }
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Exit loop if head is about to wrap and become
+ * unaligned.
+ */
+ if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
+ NIX_DESCS_PER_LOOP) {
+ pkts_left += (pkts - packets);
+ break;
+ }
- const uintptr_t cq0 = desc + CQE_SZ(head);
+ cq0 = desc + CQE_SZ(head);
+ } else {
+ cq0 = (uintptr_t)&mbufs[packets];
+ }
/* Prefetch N desc ahead */
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(8)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(9)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(10)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(11)));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 8, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 9, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 10, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 11, 0, flags));
/* Get NIX_RX_SG_S for size and buffer pointer */
- cq0_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(0) + 64));
- cq1_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(1) + 64));
- cq2_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(2) + 64));
- cq3_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(3) + 64));
-
- /* Extract mbuf from NIX_RX_SG_S */
- mbuf01 = vzip2q_u64(cq0_w8, cq1_w8);
- mbuf23 = vzip2q_u64(cq2_w8, cq3_w8);
- mbuf01 = vqsubq_u64(mbuf01, data_off);
- mbuf23 = vqsubq_u64(mbuf23, data_off);
+ cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags));
+ cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags));
+ cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags));
+ cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags));
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Extract mbuf from NIX_RX_SG_S */
+ mbuf01 = vzip2q_u64(cq0_w8, cq1_w8);
+ mbuf23 = vzip2q_u64(cq2_w8, cq3_w8);
+ mbuf01 = vqsubq_u64(mbuf01, data_off);
+ mbuf23 = vqsubq_u64(mbuf23, data_off);
+ } else {
+ mbuf01 =
+ vsubq_u64(vld1q_u64((uint64_t *)cq0), data_off);
+ mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)),
+ data_off);
+ }
/* Move mbufs to scalar registers for future use */
mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0);
@@ -395,14 +427,14 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
f3 = vqtbl1q_u8(cq3_w8, shuf_msk);
/* Load CQE word0 and word 1 */
- uint64_t cq0_w0 = ((uint64_t *)(cq0 + CQE_SZ(0)))[0];
- uint64_t cq0_w1 = ((uint64_t *)(cq0 + CQE_SZ(0)))[1];
- uint64_t cq1_w0 = ((uint64_t *)(cq0 + CQE_SZ(1)))[0];
- uint64_t cq1_w1 = ((uint64_t *)(cq0 + CQE_SZ(1)))[1];
- uint64_t cq2_w0 = ((uint64_t *)(cq0 + CQE_SZ(2)))[0];
- uint64_t cq2_w1 = ((uint64_t *)(cq0 + CQE_SZ(2)))[1];
- uint64_t cq3_w0 = ((uint64_t *)(cq0 + CQE_SZ(3)))[0];
- uint64_t cq3_w1 = ((uint64_t *)(cq0 + CQE_SZ(3)))[1];
+ const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags);
+ const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 1, flags);
+ const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags);
+ const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 1, flags);
+ const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags);
+ const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 1, flags);
+ const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags);
+ const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 1, flags);
if (flags & NIX_RX_OFFLOAD_RSS_F) {
/* Fill rss in the rx_descriptor_fields1 */
@@ -459,17 +491,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) {
ol_flags0 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(0) + 38), ol_flags0,
- mbuf0);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags),
+ ol_flags0, mbuf0);
ol_flags1 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(1) + 38), ol_flags1,
- mbuf1);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags),
+ ol_flags1, mbuf1);
ol_flags2 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(2) + 38), ol_flags2,
- mbuf2);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags),
+ ol_flags2, mbuf2);
ol_flags3 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(3) + 38), ol_flags3,
- mbuf3);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags),
+ ol_flags3, mbuf3);
}
if (flags & NIX_RX_OFFLOAD_TSTAMP_F) {
@@ -488,7 +520,7 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
RTE_PTYPE_L2_ETHER_TIMESYNC};
const uint64_t ts_olf = PKT_RX_IEEE1588_PTP |
PKT_RX_IEEE1588_TMST |
- rxq->tstamp->rx_tstamp_dynflag;
+ tstamp->rx_tstamp_dynflag;
const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8};
uint64x2_t ts01, ts23, mask;
uint64_t ts[4];
@@ -526,14 +558,10 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
ts[3] = vgetq_lane_u64(ts23, 1);
/* Store timestamp into dynfield. */
- *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) =
- ts[0];
- *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) =
- ts[1];
- *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) =
- ts[2];
- *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) =
- ts[3];
+ *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0];
+ *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1];
+ *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2];
+ *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3];
/* Generate ptype mask to filter L2 ether timesync */
mask = vdupq_n_u32(vgetq_lane_u32(f0, 0));
@@ -559,9 +587,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
/* Update Rxq timestamp with the latest
* timestamp.
*/
- rxq->tstamp->rx_ready = 1;
- rxq->tstamp->rx_tstamp =
- ts[31 - __builtin_clz(res)];
+ tstamp->rx_ready = 1;
+ tstamp->rx_tstamp = ts[31 - __builtin_clz(res)];
}
}
@@ -584,25 +611,25 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
/* Store the mbufs to rx_pkts */
- vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
- vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
+ vst1q_u64((uint64_t *)&mbufs[packets], mbuf01);
+ vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23);
if (flags & NIX_RX_MULTI_SEG_F) {
/* Multi segment is enable build mseg list for
* individual mbufs in scalar mode.
*/
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(0) + 8), mbuf0,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 0, 8, flags)),
+ mbuf0, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(1) + 8), mbuf1,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 1, 8, flags)),
+ mbuf1, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(2) + 8), mbuf2,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 2, 8, flags)),
+ mbuf2, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(3) + 8), mbuf3,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 3, 8, flags)),
+ mbuf3, mbuf_initializer, flags);
} else {
/* Update that no more segments */
mbuf0->next = NULL;
@@ -623,12 +650,18 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
__mempool_check_cookies(mbuf2->pool, (void **)&mbuf2, 1, 1);
__mempool_check_cookies(mbuf3->pool, (void **)&mbuf3, 1, 1);
- /* Advance head pointer and packets */
- head += NIX_DESCS_PER_LOOP;
- head &= qmask;
packets += NIX_DESCS_PER_LOOP;
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Advance head pointer and packets */
+ head += NIX_DESCS_PER_LOOP;
+ head &= qmask;
+ }
}
+ if (flags & NIX_RX_VWQE_F)
+ return packets;
+
rxq->head = head;
rxq->available -= packets;
@@ -637,8 +670,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
plt_write64((rxq->wdata | packets), rxq->cq_door);
if (unlikely(pkts_left))
- packets += cn10k_nix_recv_pkts(rx_queue, &rx_pkts[packets],
- pkts_left, flags);
+ packets += cn10k_nix_recv_pkts(args, &mbufs[packets], pkts_left,
+ flags);
return packets;
}
@@ -647,12 +680,15 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
static inline uint16_t
cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t pkts, const uint16_t flags)
+ uint16_t pkts, const uint16_t flags,
+ void *lookup_mem, void *tstamp)
{
+ RTE_SET_USED(lookup_mem);
RTE_SET_USED(rx_queue);
RTE_SET_USED(rx_pkts);
RTE_SET_USED(pkts);
RTE_SET_USED(flags);
+ RTE_SET_USED(tstamp);
return 0;
}
diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c
index 93528a44f9..166735ad59 100644
--- a/drivers/net/cnxk/cn10k_rx_vec.c
+++ b/drivers/net/cnxk/cn10k_rx_vec.c
@@ -12,7 +12,7 @@
uint16_t pkts) \
{ \
return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
- (flags)); \
+ (flags), NULL, NULL); \
}
NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
index 04d1e46c82..1f44dddddd 100644
--- a/drivers/net/cnxk/cn10k_rx_vec_mseg.c
+++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
@@ -9,8 +9,9 @@
uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
{ \
- return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
- (flags) | NIX_RX_MULTI_SEG_F); \
+ return cn10k_nix_recv_pkts_vector( \
+ rx_queue, rx_pkts, pkts, (flags) | NIX_RX_MULTI_SEG_F, \
+ NULL, NULL); \
}
NIX_RX_FASTPATH_MODES
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v3 13/13] event/cnxk: add Tx event vector fastpath
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
` (10 preceding siblings ...)
2021-06-20 20:29 ` [dpdk-dev] [PATCH v3 12/13] event/cnxk: add Rx event vector fastpath pbhagavatula
@ 2021-06-20 20:29 ` pbhagavatula
2021-06-27 6:57 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine Jerin Jacob
` (2 subsequent siblings)
14 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-20 20:29 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add Tx event vector fastpath, integrate event vector Tx routine
into Tx burst.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 1 +
doc/guides/rel_notes/release_21_08.rst | 2 +-
drivers/common/cnxk/roc_sso.h | 23 ++++++
drivers/event/cnxk/cn10k_eventdev.c | 3 +-
drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++--
drivers/event/cnxk/cn9k_worker.h | 4 +-
drivers/event/cnxk/cnxk_worker.h | 22 ------
drivers/net/cnxk/cn10k_tx.c | 2 +-
drivers/net/cnxk/cn10k_tx.h | 52 +++++++++----
drivers/net/cnxk/cn10k_tx_mseg.c | 3 +-
drivers/net/cnxk/cn10k_tx_vec.c | 2 +-
drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +-
12 files changed, 167 insertions(+), 53 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 0297cd3d5f..53560d3830 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -47,6 +47,7 @@ Features of the OCTEON cnxk SSO PMD are:
- Full Rx/Tx offload support defined through ethdev queue configuration.
- HW managed event vectorization on CN10K for packets enqueued from ethdev to
eventdev configurable per each Rx queue in Rx adapter.
+- Event vector transmission via Tx adapter.
Prerequisites and Compilation procedure
---------------------------------------
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 11ccc9bcb5..9e49cb27d7 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -64,7 +64,7 @@ New Features
* Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
is net/cnxk.
- * Add support for event vectorization for Rx adapter.
+ * Add support for event vectorization for Rx/Tx adapter.
Removed Items
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index a6030e7d8a..316c6ccd59 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -44,6 +44,29 @@ struct roc_sso {
uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
} __plt_cache_aligned;
+static __rte_always_inline void
+roc_sso_hws_head_wait(uintptr_t tag_op)
+{
+#ifdef RTE_ARCH_ARM64
+ uint64_t tag;
+
+ asm volatile(PLT_CPU_FEATURE_PREAMBLE
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbnz %[tag], 35, done%= \n"
+ " sevl \n"
+ "rty%=: wfe \n"
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbz %[tag], 35, rty%= \n"
+ "done%=: \n"
+ : [tag] "=&r"(tag)
+ : [tag_op] "r"(tag_op));
+#else
+ /* Wait for the SWTAG/SWTAG_FULL operation */
+ while (!(plt_read64(tag_op) & BIT_ULL(35)))
+ ;
+#endif
+}
+
/* SSO device initialization */
int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso);
int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso);
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index e85fa4785d..6f37c5bd23 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
if (ret)
*caps = 0;
else
- *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
return 0;
}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 7a48a6b17d..9cc0992063 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
NIX_RX_FASTPATH_MODES
#undef R
-static __rte_always_inline const struct cn10k_eth_txq *
+static __rte_always_inline struct cn10k_eth_txq *
cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
{
- return (const struct cn10k_eth_txq *)
+ return (struct cn10k_eth_txq *)
txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
}
+static __rte_always_inline void
+cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+ uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr,
+ uint8_t sched_type, uintptr_t base,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ uint16_t port[4], queue[4];
+ struct cn10k_eth_txq *txq;
+ uint16_t i, j;
+ uintptr_t pa;
+
+ for (i = 0; i < nb_mbufs; i += 4) {
+ port[0] = mbufs[i]->port;
+ port[1] = mbufs[i + 1]->port;
+ port[2] = mbufs[i + 2]->port;
+ port[3] = mbufs[i + 3]->port;
+
+ queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
+ queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]);
+ queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]);
+ queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]);
+
+ if (((port[0] ^ port[1]) & (port[2] ^ port[3])) ||
+ ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) {
+
+ for (j = 0; j < 4; j++) {
+ struct rte_mbuf *m = mbufs[i + j];
+
+ txq = (struct cn10k_eth_txq *)
+ txq_data[port[j]][queue[j]];
+ cn10k_nix_tx_skeleton(txq, cmd, flags);
+ /* Perform header writes before barrier
+ * for TSO
+ */
+ if (flags & NIX_TX_OFFLOAD_TSO_F)
+ cn10k_nix_xmit_prepare_tso(m, flags);
+
+ cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags,
+ txq->lso_tun_fmt);
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw =
+ cn10k_nix_prepare_mseg(
+ m, (uint64_t *)lmt_addr,
+ flags);
+ pa = txq->io_addr | ((segdw - 1) << 4);
+ } else {
+ pa = txq->io_addr |
+ (cn10k_nix_tx_ext_subs(flags) + 1)
+ << 4;
+ }
+ if (!sched_type)
+ roc_sso_hws_head_wait(base +
+ SSOW_LF_GWS_TAG);
+
+ roc_lmt_submit_steorl(lmt_id, pa);
+ }
+ } else {
+ txq = (struct cn10k_eth_txq *)
+ txq_data[port[0]][queue[0]];
+ cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base
+ + SSOW_LF_GWS_TAG,
+ flags | NIX_TX_VWQE_F);
+ }
+ }
+}
+
static __rte_always_inline uint16_t
cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
uint64_t *cmd,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
const uint32_t flags)
{
- const struct cn10k_eth_txq *txq;
- struct rte_mbuf *m = ev->mbuf;
- uint16_t ref_cnt = m->refcnt;
+ struct cn10k_eth_txq *txq;
+ struct rte_mbuf *m;
uintptr_t lmt_addr;
+ uint16_t ref_cnt;
uint16_t lmt_id;
uintptr_t pa;
lmt_addr = ws->lmt_base;
ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+ if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
+ struct rte_mbuf **mbufs = ev->vec->mbufs;
+ uint64_t meta = *(uint64_t *)ev->vec;
+
+ if (meta & BIT(31)) {
+ txq = (struct cn10k_eth_txq *)
+ txq_data[meta >> 32][meta >> 48];
+
+ cn10k_nix_xmit_pkts_vector(
+ txq, mbufs, meta & 0xFFFF, cmd,
+ ws->tx_base + SSOW_LF_GWS_TAG,
+ flags | NIX_TX_VWQE_F);
+ } else {
+ cn10k_sso_vwqe_split_tx(
+ mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr,
+ ev->sched_type, ws->tx_base, txq_data, flags);
+ }
+ rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+ return (meta & 0xFFFF);
+ }
+
+ m = ev->mbuf;
+ ref_cnt = m->refcnt;
txq = cn10k_sso_hws_xtract_meta(m, txq_data);
cn10k_nix_tx_skeleton(txq, cmd, flags);
/* Perform header writes before barrier for TSO */
@@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
}
if (!ev->sched_type)
- cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
roc_lmt_submit_steorl(lmt_id, pa);
@@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
-
return 1;
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 5aa053c586..ef1e83741a 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -458,7 +458,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
- cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
txq->io_addr, segdw);
@@ -469,7 +469,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
} else {
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
- cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_one(cmd, txq->lmt_addr,
txq->io_addr, flags);
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 4eb46ae162..945132b748 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -75,27 +75,5 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
#endif
}
-static __rte_always_inline void
-cnxk_sso_hws_head_wait(uintptr_t tag_op)
-{
-#ifdef RTE_ARCH_ARM64
- uint64_t swtp;
-
- asm volatile(PLT_CPU_FEATURE_PREAMBLE
- " ldr %[swtb], [%[swtp_loc]] \n"
- " tbz %[swtb], 35, done%= \n"
- " sevl \n"
- "rty%=: wfe \n"
- " ldr %[swtb], [%[swtp_loc]] \n"
- " tbnz %[swtb], 35, rty%= \n"
- "done%=: \n"
- : [swtb] "=&r"(swtp)
- : [swtp_loc] "r"(tag_op));
-#else
- /* Wait for the SWTAG/SWTAG_FULL operation */
- while (plt_read64(tag_op) & BIT_ULL(35))
- ;
-#endif
-}
#endif
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 1f30bab59a..0e1276c60b 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -16,7 +16,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \
- flags); \
+ 0, flags); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 532b53b319..d2a24120ef 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -18,6 +18,7 @@
* Defining it from backwards to denote its been
* not used as offload flags to pick function
*/
+#define NIX_TX_VWQE_F BIT(14)
#define NIX_TX_MULTI_SEG_F BIT(15)
#define NIX_TX_NEED_SEND_HDR_W1 \
@@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
- uint64_t *cmd, const uint16_t flags)
+ uint64_t *cmd, uintptr_t base, const uint16_t flags)
{
struct cn10k_eth_txq *txq = tx_queue;
const rte_iova_t io_addr = txq->io_addr;
@@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
uint64_t lso_tun_fmt;
uint64_t data;
- NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ if (!(flags & NIX_TX_VWQE_F)) {
+ NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ /* Reduce the cached count */
+ txq->fc_cache_pkts -= pkts;
+ }
/* Get cmd skeleton */
cn10k_nix_tx_skeleton(txq, cmd, flags);
- /* Reduce the cached count */
- txq->fc_cache_pkts -= pkts;
-
if (flags & NIX_TX_OFFLOAD_TSO_F)
lso_tun_fmt = txq->lso_tun_fmt;
@@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
}
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
/* Trigger LMTST */
if (burst > 16) {
data = cn10k_nix_tx_steor_data(flags);
@@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
struct cn10k_eth_txq *txq = tx_queue;
uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
@@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
shft += 3;
}
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
data0 = (uint64_t)data128;
data1 = (uint64_t)(data128 >> 64);
/* Make data0 similar to data1 */
@@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
@@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64_t data[2];
} wd;
- NIX_XMIT_FC_OR_RETURN(txq, pkts);
-
- scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
- pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ if (!(flags & NIX_TX_VWQE_F)) {
+ NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ /* Reduce the cached count */
+ txq->fc_cache_pkts -= pkts;
+ } else {
+ scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ }
- /* Reduce the cached count */
- txq->fc_cache_pkts -= pkts;
/* Perform header writes before barrier for TSO */
if (flags & NIX_TX_OFFLOAD_TSO_F) {
for (i = 0; i < pkts; i++)
@@ -1972,6 +1986,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (flags & NIX_TX_MULTI_SEG_F)
wd.data[0] >>= 16;
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
/* Trigger LMTST */
if (lnum > 16) {
if (!(flags & NIX_TX_MULTI_SEG_F))
@@ -2028,10 +2045,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (unlikely(scalar)) {
if (flags & NIX_TX_MULTI_SEG_F)
pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
- scalar, cmd, flags);
+ scalar, cmd, base,
+ flags);
else
pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
- cmd, flags);
+ cmd, base, flags);
}
return pkts;
@@ -2040,13 +2058,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
#else
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
RTE_SET_USED(tx_queue);
RTE_SET_USED(tx_pkts);
RTE_SET_USED(pkts);
RTE_SET_USED(cmd);
RTE_SET_USED(flags);
+ RTE_SET_USED(base);
return 0;
}
#endif
diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c
index 33f6754722..4ea4c8a4e5 100644
--- a/drivers/net/cnxk/cn10k_tx_mseg.c
+++ b/drivers/net/cnxk/cn10k_tx_mseg.c
@@ -18,7 +18,8 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \
- (flags) | NIX_TX_MULTI_SEG_F); \
+ 0, (flags) \
+ | NIX_TX_MULTI_SEG_F); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index 34e3737501..a0350496ab 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -18,7 +18,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
- (flags)); \
+ 0, (flags)); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
index 1fad81dbad..7f98f79b97 100644
--- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c
+++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
@@ -16,7 +16,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector( \
- tx_queue, tx_pkts, pkts, cmd, \
+ tx_queue, tx_pkts, pkts, cmd, 0, \
(flags) | NIX_TX_MULTI_SEG_F); \
}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* Re: [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
` (11 preceding siblings ...)
2021-06-20 20:29 ` [dpdk-dev] [PATCH v3 13/13] event/cnxk: add Tx " pbhagavatula
@ 2021-06-27 6:57 ` Jerin Jacob
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 1/6] " pbhagavatula
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 1/7] event/cnxk: add Rx adapter support pbhagavatula
14 siblings, 0 replies; 93+ messages in thread
From: Jerin Jacob @ 2021-06-27 6:57 UTC (permalink / raw)
To: Pavan Nikhilesh
Cc: Jerin Jacob, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
Satha Rao, dpdk-dev
On Mon, Jun 21, 2021 at 1:59 AM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Add multi-segment Rx vector routine, form the primary mbufs using
> vector path switch to scalar path when extracting segments.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
> Depends-on: http://patches.dpdk.org/project/dpdk/list/?series=17394
Now that net/cnxk related changes merged to dpdk-next-net-mrvl/for-next-net,
Could you rebase and send separate series based on
dpdk-next-net-mrvl/for-next-net for net/cnxk related changes.
>
> v3 Changes:
> - Spell check.
>
> drivers/net/cnxk/cn10k_rx.c | 31 +++++++++++------
> drivers/net/cnxk/cn10k_rx.h | 51 +++++++++++++++++++++-------
> drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++
> drivers/net/cnxk/cn9k_rx.c | 31 +++++++++++------
> drivers/net/cnxk/cn9k_rx.h | 51 +++++++++++++++++++++-------
> drivers/net/cnxk/cn9k_rx_vec_mseg.c | 18 ++++++++++
> drivers/net/cnxk/meson.build | 2 ++
> 7 files changed, 157 insertions(+), 44 deletions(-)
> create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c
> create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c
>
> diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c
> index 5c956c06b4..3a9fd71309 100644
> --- a/drivers/net/cnxk/cn10k_rx.c
> +++ b/drivers/net/cnxk/cn10k_rx.c
> @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
> +
> + rte_atomic_thread_fence(__ATOMIC_RELEASE);
> }
>
> void
> @@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
> #undef R
> };
>
> - /* For PTP enabled, scalar rx function should be chosen as most of the
> - * PTP apps are implemented to rx burst 1 pkt.
> - */
> - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
> - pick_rx_func(eth_dev, nix_eth_rx_burst);
> - else
> - pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
> +#define R(name, f5, f4, f3, f2, f1, f0, flags) \
> + [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name,
>
> - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> + NIX_RX_FASTPATH_MODES
> +#undef R
> + };
>
> /* Copy multi seg version with no offload for tear down sequence */
> if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> dev->rx_pkt_burst_no_offload =
> nix_eth_rx_burst_mseg[0][0][0][0][0][0];
> - rte_mb();
> +
> + /* For PTP enabled, scalar rx function should be chosen as most of the
> + * PTP apps are implemented to rx burst 1 pkt.
> + */
> + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
> + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> + return pick_rx_func(eth_dev, nix_eth_rx_burst);
> + }
> +
> + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
> + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> }
> diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
> index 1cc37cbaa0..5926ff7f46 100644
> --- a/drivers/net/cnxk/cn10k_rx.h
> +++ b/drivers/net/cnxk/cn10k_rx.h
> @@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
>
> sg = *(const uint64_t *)(rx + 1);
> nb_segs = (sg >> 48) & 0x3;
> - mbuf->nb_segs = nb_segs;
> +
> + if (nb_segs == 1) {
> + mbuf->next = NULL;
> + return;
> + }
> +
> + mbuf->pkt_len = rx->pkt_lenm1 + 1;
> mbuf->data_len = sg & 0xFFFF;
> + mbuf->nb_segs = nb_segs;
> sg = sg >> 16;
>
> eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1));
> @@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
> ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf);
>
> mbuf->ol_flags = ol_flags;
> - *(uint64_t *)(&mbuf->rearm_data) = val;
> mbuf->pkt_len = len;
> + mbuf->data_len = len;
> + *(uint64_t *)(&mbuf->rearm_data) = val;
>
> - if (flag & NIX_RX_MULTI_SEG_F) {
> + if (flag & NIX_RX_MULTI_SEG_F)
> nix_cqe_xtract_mseg(rx, mbuf, val);
> - } else {
> - mbuf->data_len = len;
> + else
> mbuf->next = NULL;
> - }
> }
>
> static inline uint16_t
> @@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
> vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
> vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
>
> - /* Update that no more segments */
> - mbuf0->next = NULL;
> - mbuf1->next = NULL;
> - mbuf2->next = NULL;
> - mbuf3->next = NULL;
> -
> /* Store the mbufs to rx_pkts */
> vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
> vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
>
> + if (flags & NIX_RX_MULTI_SEG_F) {
> + /* Multi segment is enable build mseg list for
> + * individual mbufs in scalar mode.
> + */
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(0) + 8), mbuf0,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(1) + 8), mbuf1,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(2) + 8), mbuf2,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(3) + 8), mbuf3,
> + mbuf_initializer);
> + } else {
> + /* Update that no more segments */
> + mbuf0->next = NULL;
> + mbuf1->next = NULL;
> + mbuf2->next = NULL;
> + mbuf3->next = NULL;
> + }
> +
> /* Prefetch mbufs */
> roc_prefetch_store_keep(mbuf0);
> roc_prefetch_store_keep(mbuf1);
> @@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \
> void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
> \
> uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name( \
> + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
> + \
> + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
> void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
>
> NIX_RX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
> new file mode 100644
> index 0000000000..04d1e46c82
> --- /dev/null
> +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2021 Marvell.
> + */
> +
> +#include "cn10k_ethdev.h"
> +#include "cn10k_rx.h"
> +
> +#define R(name, f5, f4, f3, f2, f1, f0, flags) \
> + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
> + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
> + { \
> + return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
> + (flags) | NIX_RX_MULTI_SEG_F); \
> + }
> +
> +NIX_RX_FASTPATH_MODES
> +#undef R
> diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c
> index 0acedd0a1f..d293d4eac3 100644
> --- a/drivers/net/cnxk/cn9k_rx.c
> +++ b/drivers/net/cnxk/cn9k_rx.c
> @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
> +
> + rte_atomic_thread_fence(__ATOMIC_RELEASE);
> }
>
> void
> @@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
> #undef R
> };
>
> - /* For PTP enabled, scalar rx function should be chosen as most of the
> - * PTP apps are implemented to rx burst 1 pkt.
> - */
> - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
> - pick_rx_func(eth_dev, nix_eth_rx_burst);
> - else
> - pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
> +#define R(name, f5, f4, f3, f2, f1, f0, flags) \
> + [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name,
>
> - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> + NIX_RX_FASTPATH_MODES
> +#undef R
> + };
>
> /* Copy multi seg version with no offload for tear down sequence */
> if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> dev->rx_pkt_burst_no_offload =
> nix_eth_rx_burst_mseg[0][0][0][0][0][0];
> - rte_mb();
> +
> + /* For PTP enabled, scalar rx function should be chosen as most of the
> + * PTP apps are implemented to rx burst 1 pkt.
> + */
> + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
> + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> + return pick_rx_func(eth_dev, nix_eth_rx_burst);
> + }
> +
> + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
> + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> }
> diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
> index 10ef5c6905..5ae9e8195c 100644
> --- a/drivers/net/cnxk/cn9k_rx.h
> +++ b/drivers/net/cnxk/cn9k_rx.h
> @@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
>
> sg = *(const uint64_t *)(rx + 1);
> nb_segs = (sg >> 48) & 0x3;
> - mbuf->nb_segs = nb_segs;
> +
> + if (nb_segs == 1) {
> + mbuf->next = NULL;
> + return;
> + }
> +
> + mbuf->pkt_len = rx->pkt_lenm1 + 1;
> mbuf->data_len = sg & 0xFFFF;
> + mbuf->nb_segs = nb_segs;
> sg = sg >> 16;
>
> eol = ((const rte_iova_t *)(rx + 1) +
> @@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
> nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf);
>
> mbuf->ol_flags = ol_flags;
> - *(uint64_t *)(&mbuf->rearm_data) = val;
> mbuf->pkt_len = len;
> + mbuf->data_len = len;
> + *(uint64_t *)(&mbuf->rearm_data) = val;
>
> - if (flag & NIX_RX_MULTI_SEG_F) {
> + if (flag & NIX_RX_MULTI_SEG_F)
> nix_cqe_xtract_mseg(rx, mbuf, val);
> - } else {
> - mbuf->data_len = len;
> + else
> mbuf->next = NULL;
> - }
> }
>
> static inline uint16_t
> @@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
> vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
> vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
>
> - /* Update that no more segments */
> - mbuf0->next = NULL;
> - mbuf1->next = NULL;
> - mbuf2->next = NULL;
> - mbuf3->next = NULL;
> -
> /* Store the mbufs to rx_pkts */
> vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
> vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
>
> + if (flags & NIX_RX_MULTI_SEG_F) {
> + /* Multi segment is enable build mseg list for
> + * individual mbufs in scalar mode.
> + */
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(0) + 8), mbuf0,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(1) + 8), mbuf1,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(2) + 8), mbuf2,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(3) + 8), mbuf3,
> + mbuf_initializer);
> + } else {
> + /* Update that no more segments */
> + mbuf0->next = NULL;
> + mbuf1->next = NULL;
> + mbuf2->next = NULL;
> + mbuf3->next = NULL;
> + }
> +
> /* Prefetch mbufs */
> roc_prefetch_store_keep(mbuf0);
> roc_prefetch_store_keep(mbuf1);
> @@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \
> void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
> \
> uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \
> + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
> + \
> + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \
> void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
>
> NIX_RX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
> new file mode 100644
> index 0000000000..e46d8a4749
> --- /dev/null
> +++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2021 Marvell.
> + */
> +
> +#include "cn9k_ethdev.h"
> +#include "cn9k_rx.h"
> +
> +#define R(name, f5, f4, f3, f2, f1, f0, flags) \
> + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \
> + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
> + { \
> + return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
> + (flags) | \
> + NIX_RX_MULTI_SEG_F); \
> + }
> +
> +NIX_RX_FASTPATH_MODES
> +#undef R
> diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
> index 2071d0dcb2..aa8c7253fb 100644
> --- a/drivers/net/cnxk/meson.build
> +++ b/drivers/net/cnxk/meson.build
> @@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c',
> 'cn9k_rx.c',
> 'cn9k_rx_mseg.c',
> 'cn9k_rx_vec.c',
> + 'cn9k_rx_vec_mseg.c',
> 'cn9k_tx.c',
> 'cn9k_tx_mseg.c',
> 'cn9k_tx_vec.c')
> @@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c',
> 'cn10k_rx.c',
> 'cn10k_rx_mseg.c',
> 'cn10k_rx_vec.c',
> + 'cn10k_rx_vec_mseg.c',
> 'cn10k_tx.c',
> 'cn10k_tx_mseg.c',
> 'cn10k_tx_vec.c')
> --
> 2.17.1
>
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v4 1/6] net/cnxk: add multi seg Rx vector routine
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
` (12 preceding siblings ...)
2021-06-27 6:57 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine Jerin Jacob
@ 2021-06-28 19:41 ` pbhagavatula
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 2/6] net/cnxk: enable ptp processing in vector Rx pbhagavatula
` (5 more replies)
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 1/7] event/cnxk: add Rx adapter support pbhagavatula
14 siblings, 6 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-28 19:41 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add multi-segment Rx vector routine, form the primary mbufs using
vector path switch to scalar path when extracting segments.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v4 Changes:
- Split patches for easier merge.
- Rebase on dpdk-next-net-mrvl.
v3 Changes:
- Spell check.
drivers/net/cnxk/cn10k_rx.c | 31 +++++++++++------
drivers/net/cnxk/cn10k_rx.h | 51 +++++++++++++++++++++-------
drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++
drivers/net/cnxk/cn9k_rx.c | 31 +++++++++++------
drivers/net/cnxk/cn9k_rx.h | 51 +++++++++++++++++++++-------
drivers/net/cnxk/cn9k_rx_vec_mseg.c | 18 ++++++++++
drivers/net/cnxk/meson.build | 2 ++
7 files changed, 157 insertions(+), 44 deletions(-)
create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c
create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c
diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c
index 5c956c06b..3a9fd7130 100644
--- a/drivers/net/cnxk/cn10k_rx.c
+++ b/drivers/net/cnxk/cn10k_rx.c
@@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
+
+ rte_atomic_thread_fence(__ATOMIC_RELEASE);
}
void
@@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
#undef R
};
- /* For PTP enabled, scalar rx function should be chosen as most of the
- * PTP apps are implemented to rx burst 1 pkt.
- */
- if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
- pick_rx_func(eth_dev, nix_eth_rx_burst);
- else
- pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
+ const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name,
- if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
- pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
/* Copy multi seg version with no offload for tear down sequence */
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
- rte_mb();
+
+ /* For PTP enabled, scalar rx function should be chosen as most of the
+ * PTP apps are implemented to rx burst 1 pkt.
+ */
+ if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_burst);
+ }
+
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
}
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index 1cc37cbaa..5926ff7f4 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
sg = *(const uint64_t *)(rx + 1);
nb_segs = (sg >> 48) & 0x3;
- mbuf->nb_segs = nb_segs;
+
+ if (nb_segs == 1) {
+ mbuf->next = NULL;
+ return;
+ }
+
+ mbuf->pkt_len = rx->pkt_lenm1 + 1;
mbuf->data_len = sg & 0xFFFF;
+ mbuf->nb_segs = nb_segs;
sg = sg >> 16;
eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1));
@@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf);
mbuf->ol_flags = ol_flags;
- *(uint64_t *)(&mbuf->rearm_data) = val;
mbuf->pkt_len = len;
+ mbuf->data_len = len;
+ *(uint64_t *)(&mbuf->rearm_data) = val;
- if (flag & NIX_RX_MULTI_SEG_F) {
+ if (flag & NIX_RX_MULTI_SEG_F)
nix_cqe_xtract_mseg(rx, mbuf, val);
- } else {
- mbuf->data_len = len;
+ else
mbuf->next = NULL;
- }
}
static inline uint16_t
@@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
- /* Update that no more segments */
- mbuf0->next = NULL;
- mbuf1->next = NULL;
- mbuf2->next = NULL;
- mbuf3->next = NULL;
-
/* Store the mbufs to rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
+ if (flags & NIX_RX_MULTI_SEG_F) {
+ /* Multi segment is enable build mseg list for
+ * individual mbufs in scalar mode.
+ */
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(0) + 8), mbuf0,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(1) + 8), mbuf1,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(2) + 8), mbuf2,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(3) + 8), mbuf3,
+ mbuf_initializer);
+ } else {
+ /* Update that no more segments */
+ mbuf0->next = NULL;
+ mbuf1->next = NULL;
+ mbuf2->next = NULL;
+ mbuf3->next = NULL;
+ }
+
/* Prefetch mbufs */
roc_prefetch_store_keep(mbuf0);
roc_prefetch_store_keep(mbuf1);
@@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
\
uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
+ \
+ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
new file mode 100644
index 000000000..04d1e46c8
--- /dev/null
+++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_ethdev.h"
+#include "cn10k_rx.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
+ { \
+ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
+ (flags) | NIX_RX_MULTI_SEG_F); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c
index 0acedd0a1..d293d4eac 100644
--- a/drivers/net/cnxk/cn9k_rx.c
+++ b/drivers/net/cnxk/cn9k_rx.c
@@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
+
+ rte_atomic_thread_fence(__ATOMIC_RELEASE);
}
void
@@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
#undef R
};
- /* For PTP enabled, scalar rx function should be chosen as most of the
- * PTP apps are implemented to rx burst 1 pkt.
- */
- if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
- pick_rx_func(eth_dev, nix_eth_rx_burst);
- else
- pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
+ const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name,
- if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
- pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
/* Copy multi seg version with no offload for tear down sequence */
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
- rte_mb();
+
+ /* For PTP enabled, scalar rx function should be chosen as most of the
+ * PTP apps are implemented to rx burst 1 pkt.
+ */
+ if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_burst);
+ }
+
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
}
diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
index 10ef5c690..5ae9e8195 100644
--- a/drivers/net/cnxk/cn9k_rx.h
+++ b/drivers/net/cnxk/cn9k_rx.h
@@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
sg = *(const uint64_t *)(rx + 1);
nb_segs = (sg >> 48) & 0x3;
- mbuf->nb_segs = nb_segs;
+
+ if (nb_segs == 1) {
+ mbuf->next = NULL;
+ return;
+ }
+
+ mbuf->pkt_len = rx->pkt_lenm1 + 1;
mbuf->data_len = sg & 0xFFFF;
+ mbuf->nb_segs = nb_segs;
sg = sg >> 16;
eol = ((const rte_iova_t *)(rx + 1) +
@@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf);
mbuf->ol_flags = ol_flags;
- *(uint64_t *)(&mbuf->rearm_data) = val;
mbuf->pkt_len = len;
+ mbuf->data_len = len;
+ *(uint64_t *)(&mbuf->rearm_data) = val;
- if (flag & NIX_RX_MULTI_SEG_F) {
+ if (flag & NIX_RX_MULTI_SEG_F)
nix_cqe_xtract_mseg(rx, mbuf, val);
- } else {
- mbuf->data_len = len;
+ else
mbuf->next = NULL;
- }
}
static inline uint16_t
@@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
- /* Update that no more segments */
- mbuf0->next = NULL;
- mbuf1->next = NULL;
- mbuf2->next = NULL;
- mbuf3->next = NULL;
-
/* Store the mbufs to rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
+ if (flags & NIX_RX_MULTI_SEG_F) {
+ /* Multi segment is enable build mseg list for
+ * individual mbufs in scalar mode.
+ */
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(0) + 8), mbuf0,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(1) + 8), mbuf1,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(2) + 8), mbuf2,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(3) + 8), mbuf3,
+ mbuf_initializer);
+ } else {
+ /* Update that no more segments */
+ mbuf0->next = NULL;
+ mbuf1->next = NULL;
+ mbuf2->next = NULL;
+ mbuf3->next = NULL;
+ }
+
/* Prefetch mbufs */
roc_prefetch_store_keep(mbuf0);
roc_prefetch_store_keep(mbuf1);
@@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
\
uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
+ \
+ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
new file mode 100644
index 000000000..e46d8a474
--- /dev/null
+++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_ethdev.h"
+#include "cn9k_rx.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
+ { \
+ return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
+ (flags) | \
+ NIX_RX_MULTI_SEG_F); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
index 2071d0dcb..aa8c7253f 100644
--- a/drivers/net/cnxk/meson.build
+++ b/drivers/net/cnxk/meson.build
@@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c',
'cn9k_rx.c',
'cn9k_rx_mseg.c',
'cn9k_rx_vec.c',
+ 'cn9k_rx_vec_mseg.c',
'cn9k_tx.c',
'cn9k_tx_mseg.c',
'cn9k_tx_vec.c')
@@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c',
'cn10k_rx.c',
'cn10k_rx_mseg.c',
'cn10k_rx_vec.c',
+ 'cn10k_rx_vec_mseg.c',
'cn10k_tx.c',
'cn10k_tx_mseg.c',
'cn10k_tx_vec.c')
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v4 2/6] net/cnxk: enable ptp processing in vector Rx
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 1/6] " pbhagavatula
@ 2021-06-28 19:41 ` pbhagavatula
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 3/6] net/cnxk: enable VLAN processing in vector Tx pbhagavatula
` (4 subsequent siblings)
5 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-28 19:41 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Enable PTP offload in vector Rx burst function, use vector path
for processing mbufs and finally switch to scalar when extracting
timestamp.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/net/cnxk/cn10k_ethdev.c | 1 -
drivers/net/cnxk/cn10k_rx.c | 5 +-
drivers/net/cnxk/cn10k_rx.h | 124 ++++++++++++++++++++++++++++----
drivers/net/cnxk/cn10k_rx_vec.c | 3 -
drivers/net/cnxk/cn9k_ethdev.c | 1 -
drivers/net/cnxk/cn9k_rx.c | 5 +-
drivers/net/cnxk/cn9k_rx.h | 124 ++++++++++++++++++++++++++++----
drivers/net/cnxk/cn9k_rx_vec.c | 3 -
drivers/net/cnxk/cnxk_ethdev.h | 19 ++---
9 files changed, 232 insertions(+), 53 deletions(-)
diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c
index b079edbd3..7caec6cf1 100644
--- a/drivers/net/cnxk/cn10k_ethdev.c
+++ b/drivers/net/cnxk/cn10k_ethdev.c
@@ -301,7 +301,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev)
if (nix_recalc_mtu(eth_dev))
plt_err("Failed to set MTU size for ptp");
- dev->scalar_ena = true;
dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F;
/* Setting up the function pointers as per new offload flags */
diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c
index 3a9fd7130..69e767ac3 100644
--- a/drivers/net/cnxk/cn10k_rx.c
+++ b/drivers/net/cnxk/cn10k_rx.c
@@ -75,10 +75,7 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
- /* For PTP enabled, scalar rx function should be chosen as most of the
- * PTP apps are implemented to rx burst 1 pkt.
- */
- if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+ if (dev->scalar_ena) {
if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
return pick_rx_func(eth_dev, nix_eth_rx_burst);
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index 5926ff7f4..d9572b19e 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -109,7 +109,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags,
static __rte_always_inline void
nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
- uint64_t rearm)
+ uint64_t rearm, const uint16_t flags)
{
const rte_iova_t *iova_list;
struct rte_mbuf *head;
@@ -125,8 +125,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
return;
}
- mbuf->pkt_len = rx->pkt_lenm1 + 1;
- mbuf->data_len = sg & 0xFFFF;
+ mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
+ CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
+ mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
+ CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
mbuf->nb_segs = nb_segs;
sg = sg >> 16;
@@ -207,7 +209,7 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
*(uint64_t *)(&mbuf->rearm_data) = val;
if (flag & NIX_RX_MULTI_SEG_F)
- nix_cqe_xtract_mseg(rx, mbuf, val);
+ nix_cqe_xtract_mseg(rx, mbuf, val, flag);
else
mbuf->next = NULL;
}
@@ -272,8 +274,9 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts,
flags);
cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp,
(flags & NIX_RX_OFFLOAD_TSTAMP_F),
- (uint64_t *)((uint8_t *)mbuf + data_off)
- );
+ (flags & NIX_RX_MULTI_SEG_F),
+ (uint64_t *)((uint8_t *)mbuf
+ + data_off));
rx_pkts[packets++] = mbuf;
roc_prefetch_store_keep(mbuf);
head++;
@@ -469,6 +472,99 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
mbuf3);
}
+ if (flags & NIX_RX_OFFLOAD_TSTAMP_F) {
+ const uint16x8_t len_off = {
+ 0, /* ptype 0:15 */
+ 0, /* ptype 16:32 */
+ CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/
+ 0, /* pktlen 16:32 */
+ CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */
+ 0,
+ 0,
+ 0};
+ const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC};
+ const uint64_t ts_olf = PKT_RX_IEEE1588_PTP |
+ PKT_RX_IEEE1588_TMST |
+ rxq->tstamp->rx_tstamp_dynflag;
+ const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8};
+ uint64x2_t ts01, ts23, mask;
+ uint64_t ts[4];
+ uint8_t res;
+
+ /* Subtract timesync length from total pkt length. */
+ f0 = vsubq_u16(f0, len_off);
+ f1 = vsubq_u16(f1, len_off);
+ f2 = vsubq_u16(f2, len_off);
+ f3 = vsubq_u16(f3, len_off);
+
+ /* Get the address of actual timestamp. */
+ ts01 = vaddq_u64(mbuf01, data_off);
+ ts23 = vaddq_u64(mbuf23, data_off);
+ /* Load timestamp from address. */
+ ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01,
+ 0),
+ ts01, 0);
+ ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01,
+ 1),
+ ts01, 1);
+ ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23,
+ 0),
+ ts23, 0);
+ ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23,
+ 1),
+ ts23, 1);
+ /* Convert from be to cpu byteorder. */
+ ts01 = vrev64q_u8(ts01);
+ ts23 = vrev64q_u8(ts23);
+ /* Store timestamp into scalar for later use. */
+ ts[0] = vgetq_lane_u64(ts01, 0);
+ ts[1] = vgetq_lane_u64(ts01, 1);
+ ts[2] = vgetq_lane_u64(ts23, 0);
+ ts[3] = vgetq_lane_u64(ts23, 1);
+
+ /* Store timestamp into dynfield. */
+ *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) =
+ ts[0];
+ *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) =
+ ts[1];
+ *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) =
+ ts[2];
+ *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) =
+ ts[3];
+
+ /* Generate ptype mask to filter L2 ether timesync */
+ mask = vdupq_n_u32(vgetq_lane_u32(f0, 0));
+ mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1);
+ mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2);
+ mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3);
+
+ /* Match against L2 ether timesync. */
+ mask = vceqq_u32(mask, ptype);
+ /* Convert from vector from scalar mask */
+ res = vaddvq_u32(vandq_u32(mask, and_mask));
+ res &= 0xF;
+
+ if (res) {
+ /* Fill in the ol_flags for any packets that
+ * matched.
+ */
+ ol_flags0 |= ((res & 0x1) ? ts_olf : 0);
+ ol_flags1 |= ((res & 0x2) ? ts_olf : 0);
+ ol_flags2 |= ((res & 0x4) ? ts_olf : 0);
+ ol_flags3 |= ((res & 0x8) ? ts_olf : 0);
+
+ /* Update Rxq timestamp with the latest
+ * timestamp.
+ */
+ rxq->tstamp->rx_ready = 1;
+ rxq->tstamp->rx_tstamp =
+ ts[31 - __builtin_clz(res)];
+ }
+ }
+
/* Form rearm_data with ol_flags */
rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1);
rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1);
@@ -496,17 +592,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
* individual mbufs in scalar mode.
*/
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(0) + 8), mbuf0,
- mbuf_initializer);
+ (cq0 + CQE_SZ(0) + 8), mbuf0,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(1) + 8), mbuf1,
- mbuf_initializer);
+ (cq0 + CQE_SZ(1) + 8), mbuf1,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(2) + 8), mbuf2,
- mbuf_initializer);
+ (cq0 + CQE_SZ(2) + 8), mbuf2,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(3) + 8), mbuf3,
- mbuf_initializer);
+ (cq0 + CQE_SZ(3) + 8), mbuf3,
+ mbuf_initializer, flags);
} else {
/* Update that no more segments */
mbuf0->next = NULL;
diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c
index 65ffa9784..93528a44f 100644
--- a/drivers/net/cnxk/cn10k_rx_vec.c
+++ b/drivers/net/cnxk/cn10k_rx_vec.c
@@ -11,9 +11,6 @@
struct rte_mbuf **rx_pkts, \
uint16_t pkts) \
{ \
- /* TSTMP is not supported by vector */ \
- if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \
- return 0; \
return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
(flags)); \
}
diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c
index 994fdb7c3..115e67891 100644
--- a/drivers/net/cnxk/cn9k_ethdev.c
+++ b/drivers/net/cnxk/cn9k_ethdev.c
@@ -309,7 +309,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev)
if (nix_recalc_mtu(eth_dev))
plt_err("Failed to set MTU size for ptp");
- dev->scalar_ena = true;
dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F;
/* Setting up the function pointers as per new offload flags */
diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c
index d293d4eac..7d9f1bd61 100644
--- a/drivers/net/cnxk/cn9k_rx.c
+++ b/drivers/net/cnxk/cn9k_rx.c
@@ -75,10 +75,7 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
- /* For PTP enabled, scalar rx function should be chosen as most of the
- * PTP apps are implemented to rx burst 1 pkt.
- */
- if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+ if (dev->scalar_ena) {
if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
return pick_rx_func(eth_dev, nix_eth_rx_burst);
diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
index 5ae9e8195..beb52f39d 100644
--- a/drivers/net/cnxk/cn9k_rx.h
+++ b/drivers/net/cnxk/cn9k_rx.h
@@ -110,7 +110,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags,
static __rte_always_inline void
nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
- uint64_t rearm)
+ uint64_t rearm, const uint16_t flags)
{
const rte_iova_t *iova_list;
struct rte_mbuf *head;
@@ -126,8 +126,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
return;
}
- mbuf->pkt_len = rx->pkt_lenm1 + 1;
- mbuf->data_len = sg & 0xFFFF;
+ mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
+ CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
+ mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
+ CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
mbuf->nb_segs = nb_segs;
sg = sg >> 16;
@@ -210,7 +212,7 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
*(uint64_t *)(&mbuf->rearm_data) = val;
if (flag & NIX_RX_MULTI_SEG_F)
- nix_cqe_xtract_mseg(rx, mbuf, val);
+ nix_cqe_xtract_mseg(rx, mbuf, val, flag);
else
mbuf->next = NULL;
}
@@ -275,8 +277,9 @@ cn9k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts,
flags);
cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp,
(flags & NIX_RX_OFFLOAD_TSTAMP_F),
- (uint64_t *)((uint8_t *)mbuf + data_off)
- );
+ (flags & NIX_RX_MULTI_SEG_F),
+ (uint64_t *)((uint8_t *)mbuf
+ + data_off));
rx_pkts[packets++] = mbuf;
roc_prefetch_store_keep(mbuf);
head++;
@@ -472,6 +475,99 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
mbuf3);
}
+ if (flags & NIX_RX_OFFLOAD_TSTAMP_F) {
+ const uint16x8_t len_off = {
+ 0, /* ptype 0:15 */
+ 0, /* ptype 16:32 */
+ CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/
+ 0, /* pktlen 16:32 */
+ CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */
+ 0,
+ 0,
+ 0};
+ const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC};
+ const uint64_t ts_olf = PKT_RX_IEEE1588_PTP |
+ PKT_RX_IEEE1588_TMST |
+ rxq->tstamp->rx_tstamp_dynflag;
+ const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8};
+ uint64x2_t ts01, ts23, mask;
+ uint64_t ts[4];
+ uint8_t res;
+
+ /* Subtract timesync length from total pkt length. */
+ f0 = vsubq_u16(f0, len_off);
+ f1 = vsubq_u16(f1, len_off);
+ f2 = vsubq_u16(f2, len_off);
+ f3 = vsubq_u16(f3, len_off);
+
+ /* Get the address of actual timestamp. */
+ ts01 = vaddq_u64(mbuf01, data_off);
+ ts23 = vaddq_u64(mbuf23, data_off);
+ /* Load timestamp from address. */
+ ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01,
+ 0),
+ ts01, 0);
+ ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01,
+ 1),
+ ts01, 1);
+ ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23,
+ 0),
+ ts23, 0);
+ ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23,
+ 1),
+ ts23, 1);
+ /* Convert from be to cpu byteorder. */
+ ts01 = vrev64q_u8(ts01);
+ ts23 = vrev64q_u8(ts23);
+ /* Store timestamp into scalar for later use. */
+ ts[0] = vgetq_lane_u64(ts01, 0);
+ ts[1] = vgetq_lane_u64(ts01, 1);
+ ts[2] = vgetq_lane_u64(ts23, 0);
+ ts[3] = vgetq_lane_u64(ts23, 1);
+
+ /* Store timestamp into dynfield. */
+ *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) =
+ ts[0];
+ *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) =
+ ts[1];
+ *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) =
+ ts[2];
+ *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) =
+ ts[3];
+
+ /* Generate ptype mask to filter L2 ether timesync */
+ mask = vdupq_n_u32(vgetq_lane_u32(f0, 0));
+ mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1);
+ mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2);
+ mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3);
+
+ /* Match against L2 ether timesync. */
+ mask = vceqq_u32(mask, ptype);
+ /* Convert from vector from scalar mask */
+ res = vaddvq_u32(vandq_u32(mask, and_mask));
+ res &= 0xF;
+
+ if (res) {
+ /* Fill in the ol_flags for any packets that
+ * matched.
+ */
+ ol_flags0 |= ((res & 0x1) ? ts_olf : 0);
+ ol_flags1 |= ((res & 0x2) ? ts_olf : 0);
+ ol_flags2 |= ((res & 0x4) ? ts_olf : 0);
+ ol_flags3 |= ((res & 0x8) ? ts_olf : 0);
+
+ /* Update Rxq timestamp with the latest
+ * timestamp.
+ */
+ rxq->tstamp->rx_ready = 1;
+ rxq->tstamp->rx_tstamp =
+ ts[31 - __builtin_clz(res)];
+ }
+ }
+
/* Form rearm_data with ol_flags */
rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1);
rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1);
@@ -499,17 +595,17 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
* individual mbufs in scalar mode.
*/
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(0) + 8), mbuf0,
- mbuf_initializer);
+ (cq0 + CQE_SZ(0) + 8), mbuf0,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(1) + 8), mbuf1,
- mbuf_initializer);
+ (cq0 + CQE_SZ(1) + 8), mbuf1,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(2) + 8), mbuf2,
- mbuf_initializer);
+ (cq0 + CQE_SZ(2) + 8), mbuf2,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(3) + 8), mbuf3,
- mbuf_initializer);
+ (cq0 + CQE_SZ(3) + 8), mbuf3,
+ mbuf_initializer, flags);
} else {
/* Update that no more segments */
mbuf0->next = NULL;
diff --git a/drivers/net/cnxk/cn9k_rx_vec.c b/drivers/net/cnxk/cn9k_rx_vec.c
index e61c2225c..ef5f771ef 100644
--- a/drivers/net/cnxk/cn9k_rx_vec.c
+++ b/drivers/net/cnxk/cn9k_rx_vec.c
@@ -9,9 +9,6 @@
uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
{ \
- /* TSTMP is not supported by vector */ \
- if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \
- return 0; \
return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
(flags)); \
}
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 67b1f4253..4eead0390 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -136,13 +136,12 @@ struct cnxk_eth_qconf {
};
struct cnxk_timesync_info {
+ uint8_t rx_ready;
+ uint64_t rx_tstamp;
uint64_t rx_tstamp_dynflag;
+ int tstamp_dynfield_offset;
rte_iova_t tx_tstamp_iova;
uint64_t *tx_tstamp;
- uint64_t rx_tstamp;
- int tstamp_dynfield_offset;
- uint8_t tx_ready;
- uint8_t rx_ready;
} __plt_cache_aligned;
struct cnxk_eth_dev {
@@ -465,13 +464,15 @@ cnxk_nix_timestamp_dynfield(struct rte_mbuf *mbuf,
static __rte_always_inline void
cnxk_nix_mbuf_to_tstamp(struct rte_mbuf *mbuf,
- struct cnxk_timesync_info *tstamp, bool ts_enable,
+ struct cnxk_timesync_info *tstamp,
+ const uint8_t ts_enable, const uint8_t mseg_enable,
uint64_t *tstamp_ptr)
{
- if (ts_enable &&
- (mbuf->data_off ==
- RTE_PKTMBUF_HEADROOM + CNXK_NIX_TIMESYNC_RX_OFFSET)) {
- mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET;
+ if (ts_enable) {
+ if (!mseg_enable) {
+ mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET;
+ mbuf->data_len -= CNXK_NIX_TIMESYNC_RX_OFFSET;
+ }
/* Reading the rx timestamp inserted by CGX, viz at
* starting of the packet data.
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v4 3/6] net/cnxk: enable VLAN processing in vector Tx
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 1/6] " pbhagavatula
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 2/6] net/cnxk: enable ptp processing in vector Rx pbhagavatula
@ 2021-06-28 19:41 ` pbhagavatula
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 4/6] net/cnxk: enable ptp " pbhagavatula
` (3 subsequent siblings)
5 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-28 19:41 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Enable VLAN offload in vector Tx burst function.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/net/cnxk/cn10k_tx.c | 3 +-
drivers/net/cnxk/cn10k_tx.h | 125 +++++++++++++++++++++++++++----
drivers/net/cnxk/cn10k_tx_vec.c | 3 +-
drivers/net/cnxk/cn9k_tx.c | 3 +-
drivers/net/cnxk/cn9k_tx.h | 128 ++++++++++++++++++++++++++++----
drivers/net/cnxk/cn9k_tx_vec.c | 3 +-
6 files changed, 227 insertions(+), 38 deletions(-)
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 18694dc70..05bc163a4 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -69,8 +69,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
if (dev->scalar_ena ||
(dev->tx_offload_flags &
- (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |
- NIX_TX_OFFLOAD_TSO_F)))
+ (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 8b1446f25..1e1697858 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -62,9 +62,14 @@ cn10k_nix_tx_ext_subs(const uint16_t flags)
static __rte_always_inline uint8_t
cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
{
- RTE_SET_USED(flags);
- /* We can pack up to 4 packets per LMTLINE if there are no offloads. */
- return 4 << ROC_LMT_LINES_PER_CORE_LOG2;
+ return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
+ << ROC_LMT_LINES_PER_CORE_LOG2;
+}
+
+static __rte_always_inline uint8_t
+cn10k_nix_tx_dwords_per_line(const uint16_t flags)
+{
+ return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8;
}
static __rte_always_inline uint64_t
@@ -98,10 +103,9 @@ cn10k_nix_tx_steor_data(const uint16_t flags)
static __rte_always_inline uint64_t
cn10k_nix_tx_steor_vec_data(const uint16_t flags)
{
- const uint64_t dw_m1 = 0x7;
+ const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
uint64_t data;
- RTE_SET_USED(flags);
/* This will be moved to addr area */
data = dw_m1;
/* 15 vector sizes for single seg */
@@ -690,11 +694,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
{
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
- uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP];
+ uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
+ cmd2[NIX_DESCS_PER_LOOP];
uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
uint16_t left, scalar, burst, i, lmt_id;
+ uint64x2_t sendext01_w0, sendext23_w0;
+ uint64x2_t sendext01_w1, sendext23_w1;
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn10k_eth_txq *txq = tx_queue;
@@ -720,6 +727,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
sgdesc23_w0 = sgdesc01_w0;
+ /* Load command defaults into vector variables. */
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
+ sendext23_w0 = sendext01_w0;
+ sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
+ sendext23_w1 = sendext01_w1;
+ }
+
/* Get LMT base address and LMT ID as lcore id */
ROC_LMT_BASE_ID_GET(laddr, lmt_id);
left = pkts;
@@ -738,6 +753,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
senddesc23_w0 = senddesc01_w0;
sgdesc23_w0 = sgdesc01_w0;
+ /* Clear vlan enables. */
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ sendext01_w1 = vbicq_u64(sendext01_w1,
+ vdupq_n_u64(0x3FFFF00FFFF00));
+ sendext23_w1 = sendext01_w1;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1303,6 +1325,52 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
+ if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
+ /* Tx ol_flag for vlan. */
+ const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
+ /* Bit enable for VLAN1 */
+ const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
+ /* Tx ol_flag for QnQ. */
+ const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
+ /* Bit enable for VLAN0 */
+ const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
+ /* Load vlan values from packet. outer is VLAN 0 */
+ uint64x2_t ext01 = {
+ ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
+ ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
+ };
+ uint64x2_t ext23 = {
+ ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
+ ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
+ };
+
+ /* Get ol_flags of the packets. */
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* ORR vlan outer/inner values into cmd. */
+ sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
+ sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
+
+ /* Test for offload enable bits and generate masks. */
+ xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
+ mlv),
+ vandq_u64(vtstq_u64(xtmp128, olq),
+ mlq));
+ ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
+ mlv),
+ vandq_u64(vtstq_u64(ytmp128, olq),
+ mlq));
+
+ /* Set vlan enable bits into cmd based on mask. */
+ sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
+ sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
@@ -1381,16 +1449,41 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
- /* Store the prepared send desc to LMT lines */
- vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
- vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
- vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
- vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
- vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
- vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
- vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
- vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
- lnum += 1;
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
+ cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
+ cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
+ cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
+ }
+
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ /* Store the prepared send desc to LMT lines */
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
+ lnum += 1;
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
+ lnum += 1;
+ } else {
+ /* Store the prepared send desc to LMT lines */
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
+ lnum += 1;
+ }
tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
}
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index 7453f3bc9..beb5c649b 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -14,8 +14,7 @@
uint64_t cmd[sz]; \
\
/* VLAN, TSTMP, TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \
- (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
+ if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
(flags) & NIX_TX_OFFLOAD_TSO_F) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
index b80260607..4b43cdaff 100644
--- a/drivers/net/cnxk/cn9k_tx.c
+++ b/drivers/net/cnxk/cn9k_tx.c
@@ -68,8 +68,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
if (dev->scalar_ena ||
(dev->tx_offload_flags &
- (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |
- NIX_TX_OFFLOAD_TSO_F)))
+ (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index 1899d6670..d5715bb52 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -552,10 +552,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
{
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
- uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP];
+ uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
+ cmd2[NIX_DESCS_PER_LOOP];
uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
+ uint64x2_t sendext01_w0, sendext23_w0;
+ uint64x2_t sendext01_w1, sendext23_w1;
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn9k_eth_txq *txq = tx_queue;
@@ -585,8 +588,19 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
senddesc23_w0 = senddesc01_w0;
senddesc01_w1 = vdupq_n_u64(0);
senddesc23_w1 = senddesc01_w1;
- sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
- sgdesc23_w0 = sgdesc01_w0;
+
+ /* Load command defaults into vector variables. */
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]);
+ sendext23_w0 = sendext01_w0;
+ sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
+ sendext23_w1 = sendext01_w1;
+ sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]);
+ sgdesc23_w0 = sgdesc01_w0;
+ } else {
+ sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
+ sgdesc23_w0 = sgdesc01_w0;
+ }
for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
/* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
@@ -597,6 +611,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
senddesc23_w0 = senddesc01_w0;
sgdesc23_w0 = sgdesc01_w0;
+ /* Clear vlan enables. */
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ sendext01_w1 = vbicq_u64(sendext01_w1,
+ vdupq_n_u64(0x3FFFF00FFFF00));
+ sendext23_w1 = sendext01_w1;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1162,6 +1183,52 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
+ if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
+ /* Tx ol_flag for vlan. */
+ const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
+ /* Bit enable for VLAN1 */
+ const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
+ /* Tx ol_flag for QnQ. */
+ const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
+ /* Bit enable for VLAN0 */
+ const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
+ /* Load vlan values from packet. outer is VLAN 0 */
+ uint64x2_t ext01 = {
+ ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
+ ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
+ };
+ uint64x2_t ext23 = {
+ ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
+ ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
+ };
+
+ /* Get ol_flags of the packets. */
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* ORR vlan outer/inner values into cmd. */
+ sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
+ sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
+
+ /* Test for offload enable bits and generate masks. */
+ xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
+ mlv),
+ vandq_u64(vtstq_u64(xtmp128, olq),
+ mlq));
+ ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
+ mlv),
+ vandq_u64(vtstq_u64(ytmp128, olq),
+ mlq));
+
+ /* Set vlan enable bits into cmd based on mask. */
+ sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
+ sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
@@ -1247,17 +1314,50 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
- do {
- vst1q_u64(lmt_addr, cmd0[0]);
- vst1q_u64(lmt_addr + 2, cmd1[0]);
- vst1q_u64(lmt_addr + 4, cmd0[1]);
- vst1q_u64(lmt_addr + 6, cmd1[1]);
- vst1q_u64(lmt_addr + 8, cmd0[2]);
- vst1q_u64(lmt_addr + 10, cmd1[2]);
- vst1q_u64(lmt_addr + 12, cmd0[3]);
- vst1q_u64(lmt_addr + 14, cmd1[3]);
- lmt_status = roc_lmt_submit_ldeor(io_addr);
- } while (lmt_status == 0);
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
+ cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
+ cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
+ cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
+ }
+
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ /* With ext header in the command we can no longer send
+ * all 4 packets together since LMTLINE is 128bytes.
+ * Split and Tx twice.
+ */
+ do {
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd2[0]);
+ vst1q_u64(lmt_addr + 4, cmd1[0]);
+ vst1q_u64(lmt_addr + 6, cmd0[1]);
+ vst1q_u64(lmt_addr + 8, cmd2[1]);
+ vst1q_u64(lmt_addr + 10, cmd1[1]);
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ } while (lmt_status == 0);
+
+ do {
+ vst1q_u64(lmt_addr, cmd0[2]);
+ vst1q_u64(lmt_addr + 2, cmd2[2]);
+ vst1q_u64(lmt_addr + 4, cmd1[2]);
+ vst1q_u64(lmt_addr + 6, cmd0[3]);
+ vst1q_u64(lmt_addr + 8, cmd2[3]);
+ vst1q_u64(lmt_addr + 10, cmd1[3]);
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ } while (lmt_status == 0);
+ } else {
+ do {
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd1[0]);
+ vst1q_u64(lmt_addr + 4, cmd0[1]);
+ vst1q_u64(lmt_addr + 6, cmd1[1]);
+ vst1q_u64(lmt_addr + 8, cmd0[2]);
+ vst1q_u64(lmt_addr + 10, cmd1[2]);
+ vst1q_u64(lmt_addr + 12, cmd0[3]);
+ vst1q_u64(lmt_addr + 14, cmd1[3]);
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ } while (lmt_status == 0);
+ }
tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
}
diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c
index a6e7c9e54..5842facb5 100644
--- a/drivers/net/cnxk/cn9k_tx_vec.c
+++ b/drivers/net/cnxk/cn9k_tx_vec.c
@@ -14,8 +14,7 @@
uint64_t cmd[sz]; \
\
/* VLAN, TSTMP, TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \
- (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
+ if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
(flags) & NIX_TX_OFFLOAD_TSO_F) \
return 0; \
return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v4 4/6] net/cnxk: enable ptp processing in vector Tx
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 1/6] " pbhagavatula
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 2/6] net/cnxk: enable ptp processing in vector Rx pbhagavatula
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 3/6] net/cnxk: enable VLAN processing in vector Tx pbhagavatula
@ 2021-06-28 19:41 ` pbhagavatula
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 5/6] net/cnxk: enable TSO " pbhagavatula
` (2 subsequent siblings)
5 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-28 19:41 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Enable PTP offload in vector Tx burst function. Since, we can
no-longer use a single LMT line for burst of 4, split the LMT
into two and transmit twice.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/net/cnxk/cn10k_tx.c | 4 +-
drivers/net/cnxk/cn10k_tx.h | 109 +++++++++++++++++++++++++++-----
drivers/net/cnxk/cn10k_tx_vec.c | 5 +-
drivers/net/cnxk/cn9k_tx.c | 4 +-
drivers/net/cnxk/cn9k_tx.h | 105 ++++++++++++++++++++++++++----
drivers/net/cnxk/cn9k_tx_vec.c | 5 +-
6 files changed, 192 insertions(+), 40 deletions(-)
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 05bc163a4..c4c3e6570 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -67,9 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena ||
- (dev->tx_offload_flags &
- (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
+ if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 1e1697858..8af6799ff 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -69,7 +69,9 @@ cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
static __rte_always_inline uint8_t
cn10k_nix_tx_dwords_per_line(const uint16_t flags)
{
- return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8;
+ return (flags & NIX_TX_NEED_EXT_HDR) ?
+ ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
+ 8;
}
static __rte_always_inline uint64_t
@@ -695,13 +697,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
- cmd2[NIX_DESCS_PER_LOOP];
+ cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
uint16_t left, scalar, burst, i, lmt_id;
uint64x2_t sendext01_w0, sendext23_w0;
uint64x2_t sendext01_w1, sendext23_w1;
+ uint64x2_t sendmem01_w0, sendmem23_w0;
+ uint64x2_t sendmem01_w1, sendmem23_w1;
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn10k_eth_txq *txq = tx_queue;
@@ -733,6 +737,12 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w0 = sendext01_w0;
sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
sendext23_w1 = sendext01_w1;
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
+ sendmem23_w0 = sendmem01_w0;
+ sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
+ sendmem23_w1 = sendmem01_w1;
+ }
}
/* Get LMT base address and LMT ID as lcore id */
@@ -760,6 +770,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w1 = sendext01_w1;
}
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ /* Reset send mem alg to SETTSTMP from SUB*/
+ sendmem01_w0 = vbicq_u64(sendmem01_w0,
+ vdupq_n_u64(BIT_ULL(59)));
+ /* Reset send mem address to default. */
+ sendmem01_w1 =
+ vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
+ sendmem23_w0 = sendmem01_w0;
+ sendmem23_w1 = sendmem01_w1;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1371,6 +1392,44 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
}
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ /* Tx ol_flag for timestam. */
+ const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
+ PKT_TX_IEEE1588_TMST};
+ /* Set send mem alg to SUB. */
+ const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
+ /* Increment send mem address by 8. */
+ const uint64x2_t addr = {0x8, 0x8};
+
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* Check if timestamp is requested and generate inverted
+ * mask as we need not make any changes to default cmd
+ * value.
+ */
+ xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
+ ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
+
+ /* Change send mem address to an 8 byte offset when
+ * TSTMP is disabled.
+ */
+ sendmem01_w1 = vaddq_u64(sendmem01_w1,
+ vandq_u64(xtmp128, addr));
+ sendmem23_w1 = vaddq_u64(sendmem23_w1,
+ vandq_u64(ytmp128, addr));
+ /* Change send mem alg to SUB when TSTMP is disabled. */
+ sendmem01_w0 = vorrq_u64(sendmem01_w0,
+ vandq_u64(xtmp128, alg));
+ sendmem23_w0 = vorrq_u64(sendmem23_w0,
+ vandq_u64(ytmp128, alg));
+
+ cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
+ cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
+ cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
+ cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
@@ -1458,19 +1517,39 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (flags & NIX_TX_NEED_EXT_HDR) {
/* Store the prepared send desc to LMT lines */
- vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
- vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
- vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
- vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
- vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
- vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
- lnum += 1;
- vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
- vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
- vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
- vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
- vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
- vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
+ lnum += 1;
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
+ } else {
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
+ lnum += 1;
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
+ }
lnum += 1;
} else {
/* Store the prepared send desc to LMT lines */
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index beb5c649b..0b4a4c7ba 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -13,9 +13,8 @@
{ \
uint64_t cmd[sz]; \
\
- /* VLAN, TSTMP, TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
- (flags) & NIX_TX_OFFLOAD_TSO_F) \
+ /* TSO is not supported by vec */ \
+ if ((flags) & NIX_TX_OFFLOAD_TSO_F) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
(flags)); \
diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
index 4b43cdaff..c32681ed4 100644
--- a/drivers/net/cnxk/cn9k_tx.c
+++ b/drivers/net/cnxk/cn9k_tx.c
@@ -66,9 +66,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena ||
- (dev->tx_offload_flags &
- (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
+ if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index d5715bb52..cb574a1c1 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -553,12 +553,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
- cmd2[NIX_DESCS_PER_LOOP];
+ cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
uint64x2_t sendext01_w0, sendext23_w0;
uint64x2_t sendext01_w1, sendext23_w1;
+ uint64x2_t sendmem01_w0, sendmem23_w0;
+ uint64x2_t sendmem01_w1, sendmem23_w1;
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn9k_eth_txq *txq = tx_queue;
@@ -597,6 +599,12 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w1 = sendext01_w1;
sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]);
sgdesc23_w0 = sgdesc01_w0;
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ sendmem01_w0 = vld1q_dup_u64(&txq->cmd[6]);
+ sendmem23_w0 = sendmem01_w0;
+ sendmem01_w1 = vld1q_dup_u64(&txq->cmd[7]);
+ sendmem23_w1 = sendmem01_w1;
+ }
} else {
sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
sgdesc23_w0 = sgdesc01_w0;
@@ -618,6 +626,17 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w1 = sendext01_w1;
}
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ /* Reset send mem alg to SETTSTMP from SUB*/
+ sendmem01_w0 = vbicq_u64(sendmem01_w0,
+ vdupq_n_u64(BIT_ULL(59)));
+ /* Reset send mem address to default. */
+ sendmem01_w1 =
+ vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
+ sendmem23_w0 = sendmem01_w0;
+ sendmem23_w1 = sendmem01_w1;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1229,6 +1248,44 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
}
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ /* Tx ol_flag for timestam. */
+ const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
+ PKT_TX_IEEE1588_TMST};
+ /* Set send mem alg to SUB. */
+ const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
+ /* Increment send mem address by 8. */
+ const uint64x2_t addr = {0x8, 0x8};
+
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* Check if timestamp is requested and generate inverted
+ * mask as we need not make any changes to default cmd
+ * value.
+ */
+ xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
+ ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
+
+ /* Change send mem address to an 8 byte offset when
+ * TSTMP is disabled.
+ */
+ sendmem01_w1 = vaddq_u64(sendmem01_w1,
+ vandq_u64(xtmp128, addr));
+ sendmem23_w1 = vaddq_u64(sendmem23_w1,
+ vandq_u64(ytmp128, addr));
+ /* Change send mem alg to SUB when TSTMP is disabled. */
+ sendmem01_w0 = vorrq_u64(sendmem01_w0,
+ vandq_u64(xtmp128, alg));
+ sendmem23_w0 = vorrq_u64(sendmem23_w0,
+ vandq_u64(ytmp128, alg));
+
+ cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
+ cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
+ cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
+ cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
@@ -1327,22 +1384,44 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
* Split and Tx twice.
*/
do {
- vst1q_u64(lmt_addr, cmd0[0]);
- vst1q_u64(lmt_addr + 2, cmd2[0]);
- vst1q_u64(lmt_addr + 4, cmd1[0]);
- vst1q_u64(lmt_addr + 6, cmd0[1]);
- vst1q_u64(lmt_addr + 8, cmd2[1]);
- vst1q_u64(lmt_addr + 10, cmd1[1]);
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd2[0]);
+ vst1q_u64(lmt_addr + 4, cmd1[0]);
+ vst1q_u64(lmt_addr + 6, cmd3[0]);
+ vst1q_u64(lmt_addr + 8, cmd0[1]);
+ vst1q_u64(lmt_addr + 10, cmd2[1]);
+ vst1q_u64(lmt_addr + 12, cmd1[1]);
+ vst1q_u64(lmt_addr + 14, cmd3[1]);
+ } else {
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd2[0]);
+ vst1q_u64(lmt_addr + 4, cmd1[0]);
+ vst1q_u64(lmt_addr + 6, cmd0[1]);
+ vst1q_u64(lmt_addr + 8, cmd2[1]);
+ vst1q_u64(lmt_addr + 10, cmd1[1]);
+ }
lmt_status = roc_lmt_submit_ldeor(io_addr);
} while (lmt_status == 0);
do {
- vst1q_u64(lmt_addr, cmd0[2]);
- vst1q_u64(lmt_addr + 2, cmd2[2]);
- vst1q_u64(lmt_addr + 4, cmd1[2]);
- vst1q_u64(lmt_addr + 6, cmd0[3]);
- vst1q_u64(lmt_addr + 8, cmd2[3]);
- vst1q_u64(lmt_addr + 10, cmd1[3]);
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ vst1q_u64(lmt_addr, cmd0[2]);
+ vst1q_u64(lmt_addr + 2, cmd2[2]);
+ vst1q_u64(lmt_addr + 4, cmd1[2]);
+ vst1q_u64(lmt_addr + 6, cmd3[2]);
+ vst1q_u64(lmt_addr + 8, cmd0[3]);
+ vst1q_u64(lmt_addr + 10, cmd2[3]);
+ vst1q_u64(lmt_addr + 12, cmd1[3]);
+ vst1q_u64(lmt_addr + 14, cmd3[3]);
+ } else {
+ vst1q_u64(lmt_addr, cmd0[2]);
+ vst1q_u64(lmt_addr + 2, cmd2[2]);
+ vst1q_u64(lmt_addr + 4, cmd1[2]);
+ vst1q_u64(lmt_addr + 6, cmd0[3]);
+ vst1q_u64(lmt_addr + 8, cmd2[3]);
+ vst1q_u64(lmt_addr + 10, cmd1[3]);
+ }
lmt_status = roc_lmt_submit_ldeor(io_addr);
} while (lmt_status == 0);
} else {
diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c
index 5842facb5..9ade66db2 100644
--- a/drivers/net/cnxk/cn9k_tx_vec.c
+++ b/drivers/net/cnxk/cn9k_tx_vec.c
@@ -13,9 +13,8 @@
{ \
uint64_t cmd[sz]; \
\
- /* VLAN, TSTMP, TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
- (flags) & NIX_TX_OFFLOAD_TSO_F) \
+ /* TSO is not supported by vec */ \
+ if ((flags) & NIX_TX_OFFLOAD_TSO_F) \
return 0; \
return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
(flags)); \
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v4 5/6] net/cnxk: enable TSO processing in vector Tx
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 1/6] " pbhagavatula
` (2 preceding siblings ...)
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 4/6] net/cnxk: enable ptp " pbhagavatula
@ 2021-06-28 19:41 ` pbhagavatula
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 6/6] net/cnxk: add multi seg Tx vector routine pbhagavatula
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 1/6] net/cnxk: add multi seg Rx " pbhagavatula
5 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-28 19:41 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Enable TSO offload in vector Tx burst function.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/net/cnxk/cn10k_tx.c | 2 +-
drivers/net/cnxk/cn10k_tx.h | 97 +++++++++++++++++++++++++++++++++
drivers/net/cnxk/cn10k_tx_vec.c | 5 +-
drivers/net/cnxk/cn9k_tx.c | 2 +-
drivers/net/cnxk/cn9k_tx.h | 94 ++++++++++++++++++++++++++++++++
drivers/net/cnxk/cn9k_tx_vec.c | 5 +-
6 files changed, 199 insertions(+), 6 deletions(-)
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index c4c3e6570..d06879163 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -67,7 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
+ if (dev->scalar_ena)
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 8af6799ff..26797581e 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -689,6 +689,46 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
#if defined(RTE_ARCH_ARM64)
+static __rte_always_inline void
+cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
+ union nix_send_ext_w0_u *w0, uint64_t ol_flags,
+ const uint64_t flags, const uint64_t lso_tun_fmt)
+{
+ uint16_t lso_sb;
+ uint64_t mask;
+
+ if (!(ol_flags & PKT_TX_TCP_SEG))
+ return;
+
+ mask = -(!w1->il3type);
+ lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
+
+ w0->u |= BIT(14);
+ w0->lso_sb = lso_sb;
+ w0->lso_mps = m->tso_segsz;
+ w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
+ w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
+
+ /* Handle tunnel tso */
+ if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
+ (ol_flags & PKT_TX_TUNNEL_MASK)) {
+ const uint8_t is_udp_tun =
+ (CNXK_NIX_UDP_TUN_BITMASK >>
+ ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
+ 0x1;
+ uint8_t shift = is_udp_tun ? 32 : 0;
+
+ shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
+ shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
+
+ w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
+ w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
+ /* Update format for UDP tunneled packet */
+
+ w0->lso_format = (lso_tun_fmt >> shift);
+ }
+}
+
#define NIX_DESCS_PER_LOOP 4
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
@@ -723,6 +763,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
/* Reduce the cached count */
txq->fc_cache_pkts -= pkts;
+ /* Perform header writes before barrier for TSO */
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ for (i = 0; i < pkts; i++)
+ cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
+ }
senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
senddesc23_w0 = senddesc01_w0;
@@ -781,6 +826,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendmem23_w1 = sendmem01_w1;
}
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ /* Clear the LSO enable bit. */
+ sendext01_w0 = vbicq_u64(sendext01_w0,
+ vdupq_n_u64(BIT_ULL(14)));
+ sendext23_w0 = sendext01_w0;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1430,6 +1482,51 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
}
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ const uint64_t lso_fmt = txq->lso_tun_fmt;
+ uint64_t sx_w0[NIX_DESCS_PER_LOOP];
+ uint64_t sd_w1[NIX_DESCS_PER_LOOP];
+
+ /* Extract SD W1 as we need to set L4 types. */
+ vst1q_u64(sd_w1, senddesc01_w1);
+ vst1q_u64(sd_w1 + 2, senddesc23_w1);
+
+ /* Extract SX W0 as we need to set LSO fields. */
+ vst1q_u64(sx_w0, sendext01_w0);
+ vst1q_u64(sx_w0 + 2, sendext23_w0);
+
+ /* Extract ol_flags. */
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* Prepare individual mbufs. */
+ cn10k_nix_prepare_tso(tx_pkts[0],
+ (union nix_send_hdr_w1_u *)&sd_w1[0],
+ (union nix_send_ext_w0_u *)&sx_w0[0],
+ vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
+
+ cn10k_nix_prepare_tso(tx_pkts[1],
+ (union nix_send_hdr_w1_u *)&sd_w1[1],
+ (union nix_send_ext_w0_u *)&sx_w0[1],
+ vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
+
+ cn10k_nix_prepare_tso(tx_pkts[2],
+ (union nix_send_hdr_w1_u *)&sd_w1[2],
+ (union nix_send_ext_w0_u *)&sx_w0[2],
+ vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
+
+ cn10k_nix_prepare_tso(tx_pkts[3],
+ (union nix_send_hdr_w1_u *)&sd_w1[3],
+ (union nix_send_ext_w0_u *)&sx_w0[3],
+ vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
+
+ senddesc01_w1 = vld1q_u64(sd_w1);
+ senddesc23_w1 = vld1q_u64(sd_w1 + 2);
+
+ sendext01_w0 = vld1q_u64(sx_w0);
+ sendext23_w0 = vld1q_u64(sx_w0 + 2);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index 0b4a4c7ba..34e373750 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -13,8 +13,9 @@
{ \
uint64_t cmd[sz]; \
\
- /* TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_TSO_F) \
+ /* For TSO inner checksum is a must */ \
+ if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
+ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
(flags)); \
diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
index c32681ed4..735e21cc6 100644
--- a/drivers/net/cnxk/cn9k_tx.c
+++ b/drivers/net/cnxk/cn9k_tx.c
@@ -66,7 +66,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
+ if (dev->scalar_ena)
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index cb574a1c1..dca732a9f 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -545,6 +545,43 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
#if defined(RTE_ARCH_ARM64)
+static __rte_always_inline void
+cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
+ union nix_send_ext_w0_u *w0, uint64_t ol_flags,
+ uint64_t flags)
+{
+ uint16_t lso_sb;
+ uint64_t mask;
+
+ if (!(ol_flags & PKT_TX_TCP_SEG))
+ return;
+
+ mask = -(!w1->il3type);
+ lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
+
+ w0->u |= BIT(14);
+ w0->lso_sb = lso_sb;
+ w0->lso_mps = m->tso_segsz;
+ w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
+ w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
+
+ /* Handle tunnel tso */
+ if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
+ (ol_flags & PKT_TX_TUNNEL_MASK)) {
+ const uint8_t is_udp_tun =
+ (CNXK_NIX_UDP_TUN_BITMASK >>
+ ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
+ 0x1;
+
+ w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
+ w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
+ /* Update format for UDP tunneled packet */
+ w0->lso_format += is_udp_tun ? 2 : 6;
+
+ w0->lso_format += !!(ol_flags & PKT_TX_OUTER_IPV6) << 1;
+ }
+}
+
#define NIX_DESCS_PER_LOOP 4
static __rte_always_inline uint16_t
cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
@@ -580,6 +617,12 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
/* Reduce the cached count */
txq->fc_cache_pkts -= pkts;
+ /* Perform header writes before barrier for TSO */
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ for (i = 0; i < pkts; i++)
+ cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
+ }
+
/* Lets commit any changes in the packet here as no further changes
* to the packet will be done unless no fast free is enabled.
*/
@@ -637,6 +680,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendmem23_w1 = sendmem01_w1;
}
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ /* Clear the LSO enable bit. */
+ sendext01_w0 = vbicq_u64(sendext01_w0,
+ vdupq_n_u64(BIT_ULL(14)));
+ sendext23_w0 = sendext01_w0;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1286,6 +1336,50 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
}
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ uint64_t sx_w0[NIX_DESCS_PER_LOOP];
+ uint64_t sd_w1[NIX_DESCS_PER_LOOP];
+
+ /* Extract SD W1 as we need to set L4 types. */
+ vst1q_u64(sd_w1, senddesc01_w1);
+ vst1q_u64(sd_w1 + 2, senddesc23_w1);
+
+ /* Extract SX W0 as we need to set LSO fields. */
+ vst1q_u64(sx_w0, sendext01_w0);
+ vst1q_u64(sx_w0 + 2, sendext23_w0);
+
+ /* Extract ol_flags. */
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* Prepare individual mbufs. */
+ cn9k_nix_prepare_tso(tx_pkts[0],
+ (union nix_send_hdr_w1_u *)&sd_w1[0],
+ (union nix_send_ext_w0_u *)&sx_w0[0],
+ vgetq_lane_u64(xtmp128, 0), flags);
+
+ cn9k_nix_prepare_tso(tx_pkts[1],
+ (union nix_send_hdr_w1_u *)&sd_w1[1],
+ (union nix_send_ext_w0_u *)&sx_w0[1],
+ vgetq_lane_u64(xtmp128, 1), flags);
+
+ cn9k_nix_prepare_tso(tx_pkts[2],
+ (union nix_send_hdr_w1_u *)&sd_w1[2],
+ (union nix_send_ext_w0_u *)&sx_w0[2],
+ vgetq_lane_u64(ytmp128, 0), flags);
+
+ cn9k_nix_prepare_tso(tx_pkts[3],
+ (union nix_send_hdr_w1_u *)&sd_w1[3],
+ (union nix_send_ext_w0_u *)&sx_w0[3],
+ vgetq_lane_u64(ytmp128, 1), flags);
+
+ senddesc01_w1 = vld1q_u64(sd_w1);
+ senddesc23_w1 = vld1q_u64(sd_w1 + 2);
+
+ sendext01_w0 = vld1q_u64(sx_w0);
+ sendext23_w0 = vld1q_u64(sx_w0 + 2);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c
index 9ade66db2..56a3e2514 100644
--- a/drivers/net/cnxk/cn9k_tx_vec.c
+++ b/drivers/net/cnxk/cn9k_tx_vec.c
@@ -13,8 +13,9 @@
{ \
uint64_t cmd[sz]; \
\
- /* TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_TSO_F) \
+ /* For TSO inner checksum is a must */ \
+ if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
+ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
(flags)); \
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v4 6/6] net/cnxk: add multi seg Tx vector routine
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 1/6] " pbhagavatula
` (3 preceding siblings ...)
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 5/6] net/cnxk: enable TSO " pbhagavatula
@ 2021-06-28 19:41 ` pbhagavatula
2021-06-29 7:25 ` Nithin Dabilpuram
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 1/6] net/cnxk: add multi seg Rx " pbhagavatula
5 siblings, 1 reply; 93+ messages in thread
From: pbhagavatula @ 2021-06-28 19:41 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add multi segment Tx vector routine.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/net/cnxk/cn10k_tx.c | 20 +-
drivers/net/cnxk/cn10k_tx.h | 388 +++++++++++++++++++++++++--
drivers/net/cnxk/cn10k_tx_vec_mseg.c | 24 ++
drivers/net/cnxk/cn9k_tx.c | 20 +-
drivers/net/cnxk/cn9k_tx.h | 272 ++++++++++++++++++-
drivers/net/cnxk/cn9k_tx_vec_mseg.c | 24 ++
drivers/net/cnxk/meson.build | 6 +-
7 files changed, 709 insertions(+), 45 deletions(-)
create mode 100644 drivers/net/cnxk/cn10k_tx_vec_mseg.c
create mode 100644 drivers/net/cnxk/cn9k_tx_vec_mseg.c
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index d06879163..1f30bab59 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -67,13 +67,23 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena)
+ const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_vec_mseg_##name,
+
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ if (dev->scalar_ena) {
pick_tx_func(eth_dev, nix_eth_tx_burst);
- else
+ if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+ pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
+ } else {
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
-
- if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
- pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
+ if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+ pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg);
+ }
rte_mb();
}
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 26797581e..532b53b31 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -42,6 +42,13 @@
} \
} while (0)
+/* Encoded number of segments to number of dwords macro, each value of nb_segs
+ * is encoded as 4bits.
+ */
+#define NIX_SEGDW_MAGIC 0x76654432210ULL
+
+#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
+
#define LMT_OFF(lmt_addr, lmt_num, offset) \
(void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))
@@ -102,6 +109,14 @@ cn10k_nix_tx_steor_data(const uint16_t flags)
return data;
}
+static __rte_always_inline uint8_t
+cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
+{
+ return ((flags & NIX_TX_NEED_EXT_HDR) ?
+ (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
+ 4);
+}
+
static __rte_always_inline uint64_t
cn10k_nix_tx_steor_vec_data(const uint16_t flags)
{
@@ -729,7 +744,244 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
}
}
+static __rte_always_inline void
+cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
+ union nix_send_hdr_w0_u *sh,
+ union nix_send_sg_s *sg, const uint32_t flags)
+{
+ struct rte_mbuf *m_next;
+ uint64_t *slist, sg_u;
+ uint16_t nb_segs;
+ int i = 1;
+
+ sh->total = m->pkt_len;
+ /* Clear sg->u header before use */
+ sg->u &= 0xFC00000000000000;
+ sg_u = sg->u;
+ slist = &cmd[0];
+
+ sg_u = sg_u | ((uint64_t)m->data_len);
+
+ nb_segs = m->nb_segs - 1;
+ m_next = m->next;
+
+ /* Set invert df if buffer is not to be freed by H/W */
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+ sg_u |= (cnxk_nix_prefree_seg(m) << 55);
+ /* Mark mempool object as "put" since it is freed by NIX */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ if (!(sg_u & (1ULL << 55)))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+
+ m = m_next;
+ /* Fill mbuf segments */
+ do {
+ m_next = m->next;
+ sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
+ *slist = rte_mbuf_data_iova(m);
+ /* Set invert df if buffer is not to be freed by H/W */
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+ sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
+ /* Mark mempool object as "put" since it is freed by NIX
+ */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ if (!(sg_u & (1ULL << (i + 55))))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+ slist++;
+ i++;
+ nb_segs--;
+ if (i > 2 && nb_segs) {
+ i = 0;
+ /* Next SG subdesc */
+ *(uint64_t *)slist = sg_u & 0xFC00000000000000;
+ sg->u = sg_u;
+ sg->segs = 3;
+ sg = (union nix_send_sg_s *)slist;
+ sg_u = sg->u;
+ slist++;
+ }
+ m = m_next;
+ } while (nb_segs);
+
+ sg->u = sg_u;
+ sg->segs = i;
+}
+
+static __rte_always_inline void
+cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
+ uint64x2_t *cmd1, const uint8_t segdw,
+ const uint32_t flags)
+{
+ union nix_send_hdr_w0_u sh;
+ union nix_send_sg_s sg;
+
+ if (m->nb_segs == 1) {
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+ sg.u |= (cnxk_nix_prefree_seg(m) << 55);
+ cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
+ }
+
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+ if (!(sg.u & (1ULL << 55)))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+ return;
+ }
+
+ sh.u = vgetq_lane_u64(cmd0[0], 0);
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+
+ cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
+
+ sh.sizem1 = segdw - 1;
+ cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
+ cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
+}
+
#define NIX_DESCS_PER_LOOP 4
+
+static __rte_always_inline uint8_t
+cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
+ uint64x2_t *cmd1, uint64x2_t *cmd2,
+ uint64x2_t *cmd3, uint8_t *segdw,
+ uint64_t *lmt_addr, __uint128_t *data128,
+ uint8_t *shift, const uint16_t flags)
+{
+ uint8_t j, off, lmt_used;
+
+ if (!(flags & NIX_TX_NEED_EXT_HDR) &&
+ !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
+ /* No segments in 4 consecutive packets. */
+ if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
+ for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
+ cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd1[0]);
+ vst1q_u64(lmt_addr + 4, cmd0[1]);
+ vst1q_u64(lmt_addr + 6, cmd1[1]);
+ vst1q_u64(lmt_addr + 8, cmd0[2]);
+ vst1q_u64(lmt_addr + 10, cmd1[2]);
+ vst1q_u64(lmt_addr + 12, cmd0[3]);
+ vst1q_u64(lmt_addr + 14, cmd1[3]);
+
+ *data128 |= ((__uint128_t)7) << *shift;
+ shift += 3;
+
+ return 1;
+ }
+ }
+
+ lmt_used = 0;
+ for (j = 0; j < NIX_DESCS_PER_LOOP;) {
+ /* Fit consecutive packets in same LMTLINE. */
+ if ((segdw[j] + segdw[j + 1]) <= 8) {
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
+ &cmd0[j + 1],
+ &cmd1[j + 1],
+ segdw[j + 1], flags);
+ /* TSTAMP takes 4 each, no segs. */
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ vst1q_u64(lmt_addr + 6, cmd3[j]);
+
+ vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
+ vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
+ vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
+ /* EXT header take 3 each, space for 2 segs.*/
+ cn10k_nix_prepare_mseg_vec(mbufs[j],
+ lmt_addr + 6,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ off = segdw[j] - 3;
+ off <<= 1;
+ cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
+ lmt_addr + 12 + off,
+ &cmd0[j + 1],
+ &cmd1[j + 1],
+ segdw[j + 1], flags);
+ vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
+ vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
+ } else {
+ cn10k_nix_prepare_mseg_vec(mbufs[j],
+ lmt_addr + 4,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd1[j]);
+ off = segdw[j] - 2;
+ off <<= 1;
+ cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
+ lmt_addr + 8 + off,
+ &cmd0[j + 1],
+ &cmd1[j + 1],
+ segdw[j + 1], flags);
+ vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
+ }
+ *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
+ << *shift;
+ *shift += 3;
+ j += 2;
+ } else {
+ if ((flags & NIX_TX_NEED_EXT_HDR) &&
+ (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
+ cn10k_nix_prepare_mseg_vec(mbufs[j],
+ lmt_addr + 6,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ off = segdw[j] - 4;
+ off <<= 1;
+ vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
+ cn10k_nix_prepare_mseg_vec(mbufs[j],
+ lmt_addr + 6,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ } else {
+ cn10k_nix_prepare_mseg_vec(mbufs[j],
+ lmt_addr + 4,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd1[j]);
+ }
+ *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
+ *shift += 3;
+ j++;
+ }
+ lmt_used++;
+ lmt_addr += 16;
+ }
+
+ return lmt_used;
+}
+
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t pkts, uint64_t *cmd, const uint16_t flags)
@@ -738,7 +990,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
- uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
+ uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
uint16_t left, scalar, burst, i, lmt_id;
@@ -746,6 +998,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64x2_t sendext01_w1, sendext23_w1;
uint64x2_t sendmem01_w0, sendmem23_w0;
uint64x2_t sendmem01_w1, sendmem23_w1;
+ uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn10k_eth_txq *txq = tx_queue;
@@ -754,7 +1007,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64x2_t ltypes01, ltypes23;
uint64x2_t xtmp128, ytmp128;
uint64x2_t xmask01, xmask23;
- uint8_t lnum;
+ uint8_t lnum, shift;
+ union wdata {
+ __uint128_t data128;
+ uint64_t data[2];
+ } wd;
NIX_XMIT_FC_OR_RETURN(txq, pkts);
@@ -798,8 +1055,43 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
cn10k_nix_pkts_per_vec_brst(flags) :
left;
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ wd.data128 = 0;
+ shift = 16;
+ }
lnum = 0;
+
for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ struct rte_mbuf *m = tx_pkts[j];
+ uint8_t j;
+
+ for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
+ /* Get dwords based on nb_segs. */
+ segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
+ /* Add dwords based on offloads. */
+ segdw[j] += 1 + /* SEND HDR */
+ !!(flags & NIX_TX_NEED_EXT_HDR) +
+ !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
+ }
+
+ /* Check if there are enough LMTLINES for this loop */
+ if (lnum + 4 > 32) {
+ uint8_t ldwords_con = 0, lneeded = 0;
+ for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
+ ldwords_con += segdw[j];
+ if (ldwords_con > 8) {
+ lneeded += 1;
+ ldwords_con = segdw[j];
+ }
+ }
+ lneeded += 1;
+ if (lnum + lneeded > 32) {
+ burst = i;
+ break;
+ }
+ }
+ }
/* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
senddesc01_w0 =
vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
@@ -1527,7 +1819,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w0 = vld1q_u64(sx_w0 + 2);
}
- if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
+ !(flags & NIX_TX_MULTI_SEG_F)) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
xmask23 = xmask01;
@@ -1567,7 +1860,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
(void **)&mbuf3, 1, 0);
senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
- } else {
+ } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1612,7 +1905,19 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
}
- if (flags & NIX_TX_NEED_EXT_HDR) {
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ uint8_t j;
+
+ segdw[4] = 8;
+ j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
+ cmd2, cmd3, segdw,
+ (uint64_t *)
+ LMT_OFF(laddr, lnum,
+ 0),
+ &wd.data128, &shift,
+ flags);
+ lnum += j;
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
/* Store the prepared send desc to LMT lines */
if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
@@ -1664,34 +1969,55 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
}
+ if (flags & NIX_TX_MULTI_SEG_F)
+ wd.data[0] >>= 16;
+
/* Trigger LMTST */
if (lnum > 16) {
- data = cn10k_nix_tx_steor_vec_data(flags);
- pa = io_addr | (data & 0x7) << 4;
- data &= ~0x7ULL;
- data |= (15ULL << 12);
- data |= (uint64_t)lmt_id;
+ if (!(flags & NIX_TX_MULTI_SEG_F))
+ wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
+
+ pa = io_addr | (wd.data[0] & 0x7) << 4;
+ wd.data[0] &= ~0x7ULL;
+
+ if (flags & NIX_TX_MULTI_SEG_F)
+ wd.data[0] <<= 16;
+
+ wd.data[0] |= (15ULL << 12);
+ wd.data[0] |= (uint64_t)lmt_id;
/* STEOR0 */
- roc_lmt_submit_steorl(data, pa);
+ roc_lmt_submit_steorl(wd.data[0], pa);
- data = cn10k_nix_tx_steor_vec_data(flags);
- pa = io_addr | (data & 0x7) << 4;
- data &= ~0x7ULL;
- data |= ((uint64_t)(lnum - 17)) << 12;
- data |= (uint64_t)(lmt_id + 16);
+ if (!(flags & NIX_TX_MULTI_SEG_F))
+ wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
+
+ pa = io_addr | (wd.data[1] & 0x7) << 4;
+ wd.data[1] &= ~0x7ULL;
+
+ if (flags & NIX_TX_MULTI_SEG_F)
+ wd.data[1] <<= 16;
+
+ wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
+ wd.data[1] |= (uint64_t)(lmt_id + 16);
/* STEOR1 */
- roc_lmt_submit_steorl(data, pa);
+ roc_lmt_submit_steorl(wd.data[1], pa);
} else if (lnum) {
- data = cn10k_nix_tx_steor_vec_data(flags);
- pa = io_addr | (data & 0x7) << 4;
- data &= ~0x7ULL;
- data |= ((uint64_t)(lnum - 1)) << 12;
- data |= lmt_id;
+ if (!(flags & NIX_TX_MULTI_SEG_F))
+ wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
+
+ pa = io_addr | (wd.data[0] & 0x7) << 4;
+ wd.data[0] &= ~0x7ULL;
+
+ if (flags & NIX_TX_MULTI_SEG_F)
+ wd.data[0] <<= 16;
+
+ wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
+ wd.data[0] |= lmt_id;
/* STEOR0 */
- roc_lmt_submit_steorl(data, pa);
+ roc_lmt_submit_steorl(wd.data[0], pa);
}
left -= burst;
@@ -1699,9 +2025,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (left)
goto again;
- if (unlikely(scalar))
- pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd,
- flags);
+ if (unlikely(scalar)) {
+ if (flags & NIX_TX_MULTI_SEG_F)
+ pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
+ scalar, cmd, flags);
+ else
+ pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
+ cmd, flags);
+ }
return pkts;
}
@@ -1866,7 +2197,10 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
\
uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
- void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
+ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
+ \
+ uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
+ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
NIX_TX_FASTPATH_MODES
#undef T
diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
new file mode 100644
index 000000000..1fad81dba
--- /dev/null
+++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_ethdev.h"
+#include "cn10k_tx.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
+ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
+ { \
+ uint64_t cmd[sz]; \
+ \
+ /* For TSO inner checksum is a must */ \
+ if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
+ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
+ return 0; \
+ return cn10k_nix_xmit_pkts_vector( \
+ tx_queue, tx_pkts, pkts, cmd, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
index 735e21cc6..763f9a14f 100644
--- a/drivers/net/cnxk/cn9k_tx.c
+++ b/drivers/net/cnxk/cn9k_tx.c
@@ -66,13 +66,23 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena)
+ const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_nix_xmit_pkts_vec_mseg_##name,
+
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ if (dev->scalar_ena) {
pick_tx_func(eth_dev, nix_eth_tx_burst);
- else
+ if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+ pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
+ } else {
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
-
- if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
- pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
+ if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+ pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg);
+ }
rte_mb();
}
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index dca732a9f..ed65cd351 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -582,7 +582,238 @@ cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
}
}
+static __rte_always_inline uint8_t
+cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
+ union nix_send_hdr_w0_u *sh,
+ union nix_send_sg_s *sg, const uint32_t flags)
+{
+ struct rte_mbuf *m_next;
+ uint64_t *slist, sg_u;
+ uint16_t nb_segs;
+ uint64_t segdw;
+ int i = 1;
+
+ sh->total = m->pkt_len;
+ /* Clear sg->u header before use */
+ sg->u &= 0xFC00000000000000;
+ sg_u = sg->u;
+ slist = &cmd[0];
+
+ sg_u = sg_u | ((uint64_t)m->data_len);
+
+ nb_segs = m->nb_segs - 1;
+ m_next = m->next;
+
+ /* Set invert df if buffer is not to be freed by H/W */
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+ sg_u |= (cnxk_nix_prefree_seg(m) << 55);
+ /* Mark mempool object as "put" since it is freed by NIX */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ if (!(sg_u & (1ULL << 55)))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+
+ m = m_next;
+ /* Fill mbuf segments */
+ do {
+ m_next = m->next;
+ sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
+ *slist = rte_mbuf_data_iova(m);
+ /* Set invert df if buffer is not to be freed by H/W */
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+ sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
+ /* Mark mempool object as "put" since it is freed by NIX
+ */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ if (!(sg_u & (1ULL << (i + 55))))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+ slist++;
+ i++;
+ nb_segs--;
+ if (i > 2 && nb_segs) {
+ i = 0;
+ /* Next SG subdesc */
+ *(uint64_t *)slist = sg_u & 0xFC00000000000000;
+ sg->u = sg_u;
+ sg->segs = 3;
+ sg = (union nix_send_sg_s *)slist;
+ sg_u = sg->u;
+ slist++;
+ }
+ m = m_next;
+ } while (nb_segs);
+
+ sg->u = sg_u;
+ sg->segs = i;
+ segdw = (uint64_t *)slist - (uint64_t *)&cmd[0];
+
+ segdw += 2;
+ /* Roundup extra dwords to multiple of 2 */
+ segdw = (segdw >> 1) + (segdw & 0x1);
+ /* Default dwords */
+ segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +
+ !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
+ sh->sizem1 = segdw - 1;
+
+ return segdw;
+}
+
+static __rte_always_inline uint8_t
+cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
+ uint64x2_t *cmd1, const uint32_t flags)
+{
+ union nix_send_hdr_w0_u sh;
+ union nix_send_sg_s sg;
+ uint8_t ret;
+
+ if (m->nb_segs == 1) {
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+ sg.u |= (cnxk_nix_prefree_seg(m) << 55);
+ cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
+ }
+
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+ if (!(sg.u & (1ULL << 55)))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+ return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) +
+ !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
+ }
+
+ sh.u = vgetq_lane_u64(cmd0[0], 0);
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+
+ ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
+
+ cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
+ cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
+ return ret;
+}
+
#define NIX_DESCS_PER_LOOP 4
+
+static __rte_always_inline void
+cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1,
+ uint64x2_t *cmd2, uint64x2_t *cmd3,
+ uint8_t *segdw,
+ uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2],
+ uint64_t *lmt_addr, rte_iova_t io_addr,
+ const uint32_t flags)
+{
+ uint64_t lmt_status;
+ uint8_t j, off;
+
+ if (!(flags & NIX_TX_NEED_EXT_HDR) &&
+ !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
+ /* No segments in 4 consecutive packets. */
+ if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
+ do {
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd1[0]);
+ vst1q_u64(lmt_addr + 4, cmd0[1]);
+ vst1q_u64(lmt_addr + 6, cmd1[1]);
+ vst1q_u64(lmt_addr + 8, cmd0[2]);
+ vst1q_u64(lmt_addr + 10, cmd1[2]);
+ vst1q_u64(lmt_addr + 12, cmd0[3]);
+ vst1q_u64(lmt_addr + 14, cmd1[3]);
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ } while (lmt_status == 0);
+
+ return;
+ }
+ }
+
+ for (j = 0; j < NIX_DESCS_PER_LOOP;) {
+ /* Fit consecutive packets in same LMTLINE. */
+ if ((segdw[j] + segdw[j + 1]) <= 8) {
+again0:
+ if ((flags & NIX_TX_NEED_EXT_HDR) &&
+ (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 4;
+ roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
+ off <<= 1;
+ vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
+
+ vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]);
+ vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]);
+ roc_lmt_mov_seg(lmt_addr + 14 + off,
+ slist[j + 1], segdw[j + 1] - 4);
+ off += ((segdw[j + 1] - 4) << 1);
+ vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]);
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 3;
+ roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
+ off <<= 1;
+ vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
+ vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
+ roc_lmt_mov_seg(lmt_addr + 12 + off,
+ slist[j + 1], segdw[j + 1] - 3);
+ } else {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 2;
+ roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
+ off <<= 1;
+ vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
+ roc_lmt_mov_seg(lmt_addr + 8 + off,
+ slist[j + 1], segdw[j + 1] - 2);
+ }
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ if (lmt_status == 0)
+ goto again0;
+ j += 2;
+ } else {
+again1:
+ if ((flags & NIX_TX_NEED_EXT_HDR) &&
+ (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 4;
+ roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
+ off <<= 1;
+ vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 3;
+ roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
+ } else {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 2;
+ roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
+ }
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ if (lmt_status == 0)
+ goto again1;
+ j += 1;
+ }
+ }
+}
+
static __rte_always_inline uint16_t
cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t pkts, uint64_t *cmd, const uint16_t flags)
@@ -1380,7 +1611,8 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w0 = vld1q_u64(sx_w0 + 2);
}
- if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
+ !(flags & NIX_TX_MULTI_SEG_F)) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
xmask23 = xmask01;
@@ -1424,7 +1656,7 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
* cnxk_nix_prefree_seg are written before LMTST.
*/
rte_io_wmb();
- } else {
+ } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1472,7 +1704,27 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
}
- if (flags & NIX_TX_NEED_EXT_HDR) {
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ uint64_t seg_list[NIX_DESCS_PER_LOOP]
+ [CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+ uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1];
+
+ /* Build mseg list for each packet individually. */
+ for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
+ segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j],
+ seg_list[j], &cmd0[j],
+ &cmd1[j], flags);
+ segdw[4] = 8;
+
+ /* Commit all changes to mbuf before LMTST. */
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+ rte_io_wmb();
+
+ cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3,
+ segdw, seg_list,
+ lmt_addr, io_addr,
+ flags);
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
/* With ext header in the command we can no longer send
* all 4 packets together since LMTLINE is 128bytes.
* Split and Tx twice.
@@ -1534,9 +1786,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
}
- if (unlikely(pkts_left))
- pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd,
- flags);
+ if (unlikely(pkts_left)) {
+ if (flags & NIX_TX_MULTI_SEG_F)
+ pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
+ pkts_left, cmd, flags);
+ else
+ pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left,
+ cmd, flags);
+ }
return pkts;
}
@@ -1701,6 +1958,9 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
\
uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \
+ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
+ \
+ uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \
void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn9k_tx_vec_mseg.c b/drivers/net/cnxk/cn9k_tx_vec_mseg.c
new file mode 100644
index 000000000..0256efd45
--- /dev/null
+++ b/drivers/net/cnxk/cn9k_tx_vec_mseg.c
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_ethdev.h"
+#include "cn9k_tx.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \
+ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
+ { \
+ uint64_t cmd[sz]; \
+ \
+ /* For TSO inner checksum is a must */ \
+ if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
+ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
+ return 0; \
+ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
+ (flags) | \
+ NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
index aa8c7253f..361f7ce84 100644
--- a/drivers/net/cnxk/meson.build
+++ b/drivers/net/cnxk/meson.build
@@ -26,7 +26,8 @@ sources += files('cn9k_ethdev.c',
'cn9k_rx_vec_mseg.c',
'cn9k_tx.c',
'cn9k_tx_mseg.c',
- 'cn9k_tx_vec.c')
+ 'cn9k_tx_vec.c',
+ 'cn9k_tx_vec_mseg.c')
# CN10K
sources += files('cn10k_ethdev.c',
'cn10k_rte_flow.c',
@@ -36,7 +37,8 @@ sources += files('cn10k_ethdev.c',
'cn10k_rx_vec_mseg.c',
'cn10k_tx.c',
'cn10k_tx_mseg.c',
- 'cn10k_tx_vec.c')
+ 'cn10k_tx_vec.c',
+ 'cn10k_tx_vec_mseg.c')
deps += ['bus_pci', 'cryptodev', 'eventdev', 'security']
deps += ['common_cnxk', 'mempool_cnxk']
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* Re: [dpdk-dev] [PATCH v4 6/6] net/cnxk: add multi seg Tx vector routine
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 6/6] net/cnxk: add multi seg Tx vector routine pbhagavatula
@ 2021-06-29 7:25 ` Nithin Dabilpuram
0 siblings, 0 replies; 93+ messages in thread
From: Nithin Dabilpuram @ 2021-06-29 7:25 UTC (permalink / raw)
To: pbhagavatula; +Cc: jerinj, Kiran Kumar K, Sunil Kumar Kori, Satha Rao, dev
On Tue, Jun 29, 2021 at 01:11:43AM +0530, pbhagavatula@marvell.com wrote:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Add multi segment Tx vector routine.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
> drivers/net/cnxk/cn10k_tx.c | 20 +-
> drivers/net/cnxk/cn10k_tx.h | 388 +++++++++++++++++++++++++--
> drivers/net/cnxk/cn10k_tx_vec_mseg.c | 24 ++
> drivers/net/cnxk/cn9k_tx.c | 20 +-
> drivers/net/cnxk/cn9k_tx.h | 272 ++++++++++++++++++-
> drivers/net/cnxk/cn9k_tx_vec_mseg.c | 24 ++
> drivers/net/cnxk/meson.build | 6 +-
> 7 files changed, 709 insertions(+), 45 deletions(-)
> create mode 100644 drivers/net/cnxk/cn10k_tx_vec_mseg.c
> create mode 100644 drivers/net/cnxk/cn9k_tx_vec_mseg.c
>
> diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
> index d06879163..1f30bab59 100644
> --- a/drivers/net/cnxk/cn10k_tx.c
> +++ b/drivers/net/cnxk/cn10k_tx.c
> @@ -67,13 +67,23 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
> #undef T
> };
>
> - if (dev->scalar_ena)
> + const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = {
> +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
> + [f5][f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_vec_mseg_##name,
> +
> + NIX_TX_FASTPATH_MODES
> +#undef T
> + };
> +
> + if (dev->scalar_ena) {
> pick_tx_func(eth_dev, nix_eth_tx_burst);
> - else
> + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
> + pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
> + } else {
> pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
> -
> - if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
> - pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
> + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
> + pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg);
> + }
>
> rte_mb();
> }
> diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
> index 26797581e..532b53b31 100644
> --- a/drivers/net/cnxk/cn10k_tx.h
> +++ b/drivers/net/cnxk/cn10k_tx.h
> @@ -42,6 +42,13 @@
> } \
> } while (0)
>
> +/* Encoded number of segments to number of dwords macro, each value of nb_segs
> + * is encoded as 4bits.
> + */
> +#define NIX_SEGDW_MAGIC 0x76654432210ULL
> +
> +#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
> +
> #define LMT_OFF(lmt_addr, lmt_num, offset) \
> (void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))
>
> @@ -102,6 +109,14 @@ cn10k_nix_tx_steor_data(const uint16_t flags)
> return data;
> }
>
> +static __rte_always_inline uint8_t
> +cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
> +{
> + return ((flags & NIX_TX_NEED_EXT_HDR) ?
> + (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
> + 4);
> +}
> +
> static __rte_always_inline uint64_t
> cn10k_nix_tx_steor_vec_data(const uint16_t flags)
> {
> @@ -729,7 +744,244 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
> }
> }
>
> +static __rte_always_inline void
> +cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
> + union nix_send_hdr_w0_u *sh,
> + union nix_send_sg_s *sg, const uint32_t flags)
> +{
> + struct rte_mbuf *m_next;
> + uint64_t *slist, sg_u;
> + uint16_t nb_segs;
> + int i = 1;
> +
> + sh->total = m->pkt_len;
> + /* Clear sg->u header before use */
> + sg->u &= 0xFC00000000000000;
> + sg_u = sg->u;
> + slist = &cmd[0];
> +
> + sg_u = sg_u | ((uint64_t)m->data_len);
> +
> + nb_segs = m->nb_segs - 1;
> + m_next = m->next;
> +
> + /* Set invert df if buffer is not to be freed by H/W */
> + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
> + sg_u |= (cnxk_nix_prefree_seg(m) << 55);
> + /* Mark mempool object as "put" since it is freed by NIX */
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> + if (!(sg_u & (1ULL << 55)))
> + __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
> + rte_io_wmb();
> +#endif
> +
> + m = m_next;
> + /* Fill mbuf segments */
> + do {
> + m_next = m->next;
> + sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
> + *slist = rte_mbuf_data_iova(m);
> + /* Set invert df if buffer is not to be freed by H/W */
> + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
> + sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
> + /* Mark mempool object as "put" since it is freed by NIX
> + */
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> + if (!(sg_u & (1ULL << (i + 55))))
> + __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
> + rte_io_wmb();
> +#endif
> + slist++;
> + i++;
> + nb_segs--;
> + if (i > 2 && nb_segs) {
> + i = 0;
> + /* Next SG subdesc */
> + *(uint64_t *)slist = sg_u & 0xFC00000000000000;
> + sg->u = sg_u;
> + sg->segs = 3;
> + sg = (union nix_send_sg_s *)slist;
> + sg_u = sg->u;
> + slist++;
> + }
> + m = m_next;
> + } while (nb_segs);
> +
> + sg->u = sg_u;
> + sg->segs = i;
> +}
> +
> +static __rte_always_inline void
> +cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
> + uint64x2_t *cmd1, const uint8_t segdw,
> + const uint32_t flags)
> +{
> + union nix_send_hdr_w0_u sh;
> + union nix_send_sg_s sg;
> +
> + if (m->nb_segs == 1) {
> + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
> + sg.u = vgetq_lane_u64(cmd1[0], 0);
> + sg.u |= (cnxk_nix_prefree_seg(m) << 55);
> + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
> + }
> +
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> + sg.u = vgetq_lane_u64(cmd1[0], 0);
> + if (!(sg.u & (1ULL << 55)))
> + __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
> + rte_io_wmb();
> +#endif
> + return;
> + }
> +
> + sh.u = vgetq_lane_u64(cmd0[0], 0);
> + sg.u = vgetq_lane_u64(cmd1[0], 0);
> +
> + cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
> +
> + sh.sizem1 = segdw - 1;
> + cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
> + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
> +}
> +
> #define NIX_DESCS_PER_LOOP 4
> +
> +static __rte_always_inline uint8_t
> +cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
> + uint64x2_t *cmd1, uint64x2_t *cmd2,
> + uint64x2_t *cmd3, uint8_t *segdw,
> + uint64_t *lmt_addr, __uint128_t *data128,
> + uint8_t *shift, const uint16_t flags)
> +{
> + uint8_t j, off, lmt_used;
> +
> + if (!(flags & NIX_TX_NEED_EXT_HDR) &&
> + !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
> + /* No segments in 4 consecutive packets. */
> + if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
> + for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
> + cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
> + &cmd0[j], &cmd1[j],
> + segdw[j], flags);
> + vst1q_u64(lmt_addr, cmd0[0]);
> + vst1q_u64(lmt_addr + 2, cmd1[0]);
> + vst1q_u64(lmt_addr + 4, cmd0[1]);
> + vst1q_u64(lmt_addr + 6, cmd1[1]);
> + vst1q_u64(lmt_addr + 8, cmd0[2]);
> + vst1q_u64(lmt_addr + 10, cmd1[2]);
> + vst1q_u64(lmt_addr + 12, cmd0[3]);
> + vst1q_u64(lmt_addr + 14, cmd1[3]);
> +
> + *data128 |= ((__uint128_t)7) << *shift;
> + shift += 3;
> +
> + return 1;
> + }
> + }
> +
> + lmt_used = 0;
> + for (j = 0; j < NIX_DESCS_PER_LOOP;) {
> + /* Fit consecutive packets in same LMTLINE. */
> + if ((segdw[j] + segdw[j + 1]) <= 8) {
> + if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
> + cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
> + &cmd0[j], &cmd1[j],
> + segdw[j], flags);
> + cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
> + &cmd0[j + 1],
> + &cmd1[j + 1],
> + segdw[j + 1], flags);
> + /* TSTAMP takes 4 each, no segs. */
> + vst1q_u64(lmt_addr, cmd0[j]);
> + vst1q_u64(lmt_addr + 2, cmd2[j]);
> + vst1q_u64(lmt_addr + 4, cmd1[j]);
> + vst1q_u64(lmt_addr + 6, cmd3[j]);
> +
> + vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
> + vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
> + vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
> + vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
> + } else if (flags & NIX_TX_NEED_EXT_HDR) {
> + /* EXT header take 3 each, space for 2 segs.*/
> + cn10k_nix_prepare_mseg_vec(mbufs[j],
> + lmt_addr + 6,
> + &cmd0[j], &cmd1[j],
> + segdw[j], flags);
> + vst1q_u64(lmt_addr, cmd0[j]);
> + vst1q_u64(lmt_addr + 2, cmd2[j]);
> + vst1q_u64(lmt_addr + 4, cmd1[j]);
> + off = segdw[j] - 3;
> + off <<= 1;
> + cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
> + lmt_addr + 12 + off,
> + &cmd0[j + 1],
> + &cmd1[j + 1],
> + segdw[j + 1], flags);
> + vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
> + vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
> + vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
> + } else {
> + cn10k_nix_prepare_mseg_vec(mbufs[j],
> + lmt_addr + 4,
> + &cmd0[j], &cmd1[j],
> + segdw[j], flags);
> + vst1q_u64(lmt_addr, cmd0[j]);
> + vst1q_u64(lmt_addr + 2, cmd1[j]);
> + off = segdw[j] - 2;
> + off <<= 1;
> + cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
> + lmt_addr + 8 + off,
> + &cmd0[j + 1],
> + &cmd1[j + 1],
> + segdw[j + 1], flags);
> + vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
> + vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
> + }
> + *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
> + << *shift;
> + *shift += 3;
> + j += 2;
> + } else {
> + if ((flags & NIX_TX_NEED_EXT_HDR) &&
> + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
> + cn10k_nix_prepare_mseg_vec(mbufs[j],
> + lmt_addr + 6,
> + &cmd0[j], &cmd1[j],
> + segdw[j], flags);
> + vst1q_u64(lmt_addr, cmd0[j]);
> + vst1q_u64(lmt_addr + 2, cmd2[j]);
> + vst1q_u64(lmt_addr + 4, cmd1[j]);
> + off = segdw[j] - 4;
> + off <<= 1;
> + vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
> + } else if (flags & NIX_TX_NEED_EXT_HDR) {
> + cn10k_nix_prepare_mseg_vec(mbufs[j],
> + lmt_addr + 6,
> + &cmd0[j], &cmd1[j],
> + segdw[j], flags);
> + vst1q_u64(lmt_addr, cmd0[j]);
> + vst1q_u64(lmt_addr + 2, cmd2[j]);
> + vst1q_u64(lmt_addr + 4, cmd1[j]);
> + } else {
> + cn10k_nix_prepare_mseg_vec(mbufs[j],
> + lmt_addr + 4,
> + &cmd0[j], &cmd1[j],
> + segdw[j], flags);
> + vst1q_u64(lmt_addr, cmd0[j]);
> + vst1q_u64(lmt_addr + 2, cmd1[j]);
> + }
> + *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
> + *shift += 3;
> + j++;
> + }
> + lmt_used++;
> + lmt_addr += 16;
> + }
> +
> + return lmt_used;
> +}
> +
> static __rte_always_inline uint16_t
> cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> uint16_t pkts, uint64_t *cmd, const uint16_t flags)
> @@ -738,7 +990,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
> uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
> cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
> - uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
> + uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
> uint64x2_t senddesc01_w0, senddesc23_w0;
> uint64x2_t senddesc01_w1, senddesc23_w1;
> uint16_t left, scalar, burst, i, lmt_id;
> @@ -746,6 +998,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> uint64x2_t sendext01_w1, sendext23_w1;
> uint64x2_t sendmem01_w0, sendmem23_w0;
> uint64x2_t sendmem01_w1, sendmem23_w1;
> + uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
> uint64x2_t sgdesc01_w0, sgdesc23_w0;
> uint64x2_t sgdesc01_w1, sgdesc23_w1;
> struct cn10k_eth_txq *txq = tx_queue;
> @@ -754,7 +1007,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> uint64x2_t ltypes01, ltypes23;
> uint64x2_t xtmp128, ytmp128;
> uint64x2_t xmask01, xmask23;
> - uint8_t lnum;
> + uint8_t lnum, shift;
> + union wdata {
> + __uint128_t data128;
> + uint64_t data[2];
> + } wd;
>
> NIX_XMIT_FC_OR_RETURN(txq, pkts);
>
> @@ -798,8 +1055,43 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
> cn10k_nix_pkts_per_vec_brst(flags) :
> left;
> + if (flags & NIX_TX_MULTI_SEG_F) {
> + wd.data128 = 0;
> + shift = 16;
> + }
> lnum = 0;
> +
> for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
> + if (flags & NIX_TX_MULTI_SEG_F) {
> + struct rte_mbuf *m = tx_pkts[j];
> + uint8_t j;
[Nithin] I guess it moved out of below loop while rebasing.
With this fixed,
Series-acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
> +
> + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
> + /* Get dwords based on nb_segs. */
> + segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
> + /* Add dwords based on offloads. */
> + segdw[j] += 1 + /* SEND HDR */
> + !!(flags & NIX_TX_NEED_EXT_HDR) +
> + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
> + }
> +
> + /* Check if there are enough LMTLINES for this loop */
> + if (lnum + 4 > 32) {
> + uint8_t ldwords_con = 0, lneeded = 0;
> + for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
> + ldwords_con += segdw[j];
> + if (ldwords_con > 8) {
> + lneeded += 1;
> + ldwords_con = segdw[j];
> + }
> + }
> + lneeded += 1;
> + if (lnum + lneeded > 32) {
> + burst = i;
> + break;
> + }
> + }
> + }
> /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
> senddesc01_w0 =
> vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
> @@ -1527,7 +1819,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> sendext23_w0 = vld1q_u64(sx_w0 + 2);
> }
>
> - if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
> + if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
> + !(flags & NIX_TX_MULTI_SEG_F)) {
> /* Set don't free bit if reference count > 1 */
> xmask01 = vdupq_n_u64(0);
> xmask23 = xmask01;
> @@ -1567,7 +1860,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> (void **)&mbuf3, 1, 0);
> senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
> senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
> - } else {
> + } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
> /* Move mbufs to iova */
> mbuf0 = (uint64_t *)tx_pkts[0];
> mbuf1 = (uint64_t *)tx_pkts[1];
> @@ -1612,7 +1905,19 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
> }
>
> - if (flags & NIX_TX_NEED_EXT_HDR) {
> + if (flags & NIX_TX_MULTI_SEG_F) {
> + uint8_t j;
> +
> + segdw[4] = 8;
> + j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
> + cmd2, cmd3, segdw,
> + (uint64_t *)
> + LMT_OFF(laddr, lnum,
> + 0),
> + &wd.data128, &shift,
> + flags);
> + lnum += j;
> + } else if (flags & NIX_TX_NEED_EXT_HDR) {
> /* Store the prepared send desc to LMT lines */
> if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
> vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
> @@ -1664,34 +1969,55 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
> }
>
> + if (flags & NIX_TX_MULTI_SEG_F)
> + wd.data[0] >>= 16;
> +
> /* Trigger LMTST */
> if (lnum > 16) {
> - data = cn10k_nix_tx_steor_vec_data(flags);
> - pa = io_addr | (data & 0x7) << 4;
> - data &= ~0x7ULL;
> - data |= (15ULL << 12);
> - data |= (uint64_t)lmt_id;
> + if (!(flags & NIX_TX_MULTI_SEG_F))
> + wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
> +
> + pa = io_addr | (wd.data[0] & 0x7) << 4;
> + wd.data[0] &= ~0x7ULL;
> +
> + if (flags & NIX_TX_MULTI_SEG_F)
> + wd.data[0] <<= 16;
> +
> + wd.data[0] |= (15ULL << 12);
> + wd.data[0] |= (uint64_t)lmt_id;
>
> /* STEOR0 */
> - roc_lmt_submit_steorl(data, pa);
> + roc_lmt_submit_steorl(wd.data[0], pa);
>
> - data = cn10k_nix_tx_steor_vec_data(flags);
> - pa = io_addr | (data & 0x7) << 4;
> - data &= ~0x7ULL;
> - data |= ((uint64_t)(lnum - 17)) << 12;
> - data |= (uint64_t)(lmt_id + 16);
> + if (!(flags & NIX_TX_MULTI_SEG_F))
> + wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
> +
> + pa = io_addr | (wd.data[1] & 0x7) << 4;
> + wd.data[1] &= ~0x7ULL;
> +
> + if (flags & NIX_TX_MULTI_SEG_F)
> + wd.data[1] <<= 16;
> +
> + wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
> + wd.data[1] |= (uint64_t)(lmt_id + 16);
>
> /* STEOR1 */
> - roc_lmt_submit_steorl(data, pa);
> + roc_lmt_submit_steorl(wd.data[1], pa);
> } else if (lnum) {
> - data = cn10k_nix_tx_steor_vec_data(flags);
> - pa = io_addr | (data & 0x7) << 4;
> - data &= ~0x7ULL;
> - data |= ((uint64_t)(lnum - 1)) << 12;
> - data |= lmt_id;
> + if (!(flags & NIX_TX_MULTI_SEG_F))
> + wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
> +
> + pa = io_addr | (wd.data[0] & 0x7) << 4;
> + wd.data[0] &= ~0x7ULL;
> +
> + if (flags & NIX_TX_MULTI_SEG_F)
> + wd.data[0] <<= 16;
> +
> + wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
> + wd.data[0] |= lmt_id;
>
> /* STEOR0 */
> - roc_lmt_submit_steorl(data, pa);
> + roc_lmt_submit_steorl(wd.data[0], pa);
> }
>
> left -= burst;
> @@ -1699,9 +2025,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> if (left)
> goto again;
>
> - if (unlikely(scalar))
> - pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd,
> - flags);
> + if (unlikely(scalar)) {
> + if (flags & NIX_TX_MULTI_SEG_F)
> + pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
> + scalar, cmd, flags);
> + else
> + pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
> + cmd, flags);
> + }
>
> return pkts;
> }
> @@ -1866,7 +2197,10 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
> void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
> \
> uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
> - void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
> + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
> + \
> + uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
> + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
>
> NIX_TX_FASTPATH_MODES
> #undef T
> diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
> new file mode 100644
> index 000000000..1fad81dba
> --- /dev/null
> +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
> @@ -0,0 +1,24 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2021 Marvell.
> + */
> +
> +#include "cn10k_ethdev.h"
> +#include "cn10k_tx.h"
> +
> +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
> + uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
> + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
> + { \
> + uint64_t cmd[sz]; \
> + \
> + /* For TSO inner checksum is a must */ \
> + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
> + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
> + return 0; \
> + return cn10k_nix_xmit_pkts_vector( \
> + tx_queue, tx_pkts, pkts, cmd, \
> + (flags) | NIX_TX_MULTI_SEG_F); \
> + }
> +
> +NIX_TX_FASTPATH_MODES
> +#undef T
> diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
> index 735e21cc6..763f9a14f 100644
> --- a/drivers/net/cnxk/cn9k_tx.c
> +++ b/drivers/net/cnxk/cn9k_tx.c
> @@ -66,13 +66,23 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
> #undef T
> };
>
> - if (dev->scalar_ena)
> + const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = {
> +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
> + [f5][f4][f3][f2][f1][f0] = cn9k_nix_xmit_pkts_vec_mseg_##name,
> +
> + NIX_TX_FASTPATH_MODES
> +#undef T
> + };
> +
> + if (dev->scalar_ena) {
> pick_tx_func(eth_dev, nix_eth_tx_burst);
> - else
> + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
> + pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
> + } else {
> pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
> -
> - if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
> - pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
> + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
> + pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg);
> + }
>
> rte_mb();
> }
> diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
> index dca732a9f..ed65cd351 100644
> --- a/drivers/net/cnxk/cn9k_tx.h
> +++ b/drivers/net/cnxk/cn9k_tx.h
> @@ -582,7 +582,238 @@ cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
> }
> }
>
> +static __rte_always_inline uint8_t
> +cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
> + union nix_send_hdr_w0_u *sh,
> + union nix_send_sg_s *sg, const uint32_t flags)
> +{
> + struct rte_mbuf *m_next;
> + uint64_t *slist, sg_u;
> + uint16_t nb_segs;
> + uint64_t segdw;
> + int i = 1;
> +
> + sh->total = m->pkt_len;
> + /* Clear sg->u header before use */
> + sg->u &= 0xFC00000000000000;
> + sg_u = sg->u;
> + slist = &cmd[0];
> +
> + sg_u = sg_u | ((uint64_t)m->data_len);
> +
> + nb_segs = m->nb_segs - 1;
> + m_next = m->next;
> +
> + /* Set invert df if buffer is not to be freed by H/W */
> + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
> + sg_u |= (cnxk_nix_prefree_seg(m) << 55);
> + /* Mark mempool object as "put" since it is freed by NIX */
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> + if (!(sg_u & (1ULL << 55)))
> + __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
> + rte_io_wmb();
> +#endif
> +
> + m = m_next;
> + /* Fill mbuf segments */
> + do {
> + m_next = m->next;
> + sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
> + *slist = rte_mbuf_data_iova(m);
> + /* Set invert df if buffer is not to be freed by H/W */
> + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
> + sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
> + /* Mark mempool object as "put" since it is freed by NIX
> + */
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> + if (!(sg_u & (1ULL << (i + 55))))
> + __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
> + rte_io_wmb();
> +#endif
> + slist++;
> + i++;
> + nb_segs--;
> + if (i > 2 && nb_segs) {
> + i = 0;
> + /* Next SG subdesc */
> + *(uint64_t *)slist = sg_u & 0xFC00000000000000;
> + sg->u = sg_u;
> + sg->segs = 3;
> + sg = (union nix_send_sg_s *)slist;
> + sg_u = sg->u;
> + slist++;
> + }
> + m = m_next;
> + } while (nb_segs);
> +
> + sg->u = sg_u;
> + sg->segs = i;
> + segdw = (uint64_t *)slist - (uint64_t *)&cmd[0];
> +
> + segdw += 2;
> + /* Roundup extra dwords to multiple of 2 */
> + segdw = (segdw >> 1) + (segdw & 0x1);
> + /* Default dwords */
> + segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +
> + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
> + sh->sizem1 = segdw - 1;
> +
> + return segdw;
> +}
> +
> +static __rte_always_inline uint8_t
> +cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
> + uint64x2_t *cmd1, const uint32_t flags)
> +{
> + union nix_send_hdr_w0_u sh;
> + union nix_send_sg_s sg;
> + uint8_t ret;
> +
> + if (m->nb_segs == 1) {
> + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
> + sg.u = vgetq_lane_u64(cmd1[0], 0);
> + sg.u |= (cnxk_nix_prefree_seg(m) << 55);
> + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
> + }
> +
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> + sg.u = vgetq_lane_u64(cmd1[0], 0);
> + if (!(sg.u & (1ULL << 55)))
> + __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
> + rte_io_wmb();
> +#endif
> + return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) +
> + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
> + }
> +
> + sh.u = vgetq_lane_u64(cmd0[0], 0);
> + sg.u = vgetq_lane_u64(cmd1[0], 0);
> +
> + ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
> +
> + cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
> + cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
> + return ret;
> +}
> +
> #define NIX_DESCS_PER_LOOP 4
> +
> +static __rte_always_inline void
> +cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1,
> + uint64x2_t *cmd2, uint64x2_t *cmd3,
> + uint8_t *segdw,
> + uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2],
> + uint64_t *lmt_addr, rte_iova_t io_addr,
> + const uint32_t flags)
> +{
> + uint64_t lmt_status;
> + uint8_t j, off;
> +
> + if (!(flags & NIX_TX_NEED_EXT_HDR) &&
> + !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
> + /* No segments in 4 consecutive packets. */
> + if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
> + do {
> + vst1q_u64(lmt_addr, cmd0[0]);
> + vst1q_u64(lmt_addr + 2, cmd1[0]);
> + vst1q_u64(lmt_addr + 4, cmd0[1]);
> + vst1q_u64(lmt_addr + 6, cmd1[1]);
> + vst1q_u64(lmt_addr + 8, cmd0[2]);
> + vst1q_u64(lmt_addr + 10, cmd1[2]);
> + vst1q_u64(lmt_addr + 12, cmd0[3]);
> + vst1q_u64(lmt_addr + 14, cmd1[3]);
> + lmt_status = roc_lmt_submit_ldeor(io_addr);
> + } while (lmt_status == 0);
> +
> + return;
> + }
> + }
> +
> + for (j = 0; j < NIX_DESCS_PER_LOOP;) {
> + /* Fit consecutive packets in same LMTLINE. */
> + if ((segdw[j] + segdw[j + 1]) <= 8) {
> +again0:
> + if ((flags & NIX_TX_NEED_EXT_HDR) &&
> + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
> + vst1q_u64(lmt_addr, cmd0[j]);
> + vst1q_u64(lmt_addr + 2, cmd2[j]);
> + vst1q_u64(lmt_addr + 4, cmd1[j]);
> + /* Copy segs */
> + off = segdw[j] - 4;
> + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
> + off <<= 1;
> + vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
> +
> + vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]);
> + vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]);
> + vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]);
> + roc_lmt_mov_seg(lmt_addr + 14 + off,
> + slist[j + 1], segdw[j + 1] - 4);
> + off += ((segdw[j + 1] - 4) << 1);
> + vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]);
> + } else if (flags & NIX_TX_NEED_EXT_HDR) {
> + vst1q_u64(lmt_addr, cmd0[j]);
> + vst1q_u64(lmt_addr + 2, cmd2[j]);
> + vst1q_u64(lmt_addr + 4, cmd1[j]);
> + /* Copy segs */
> + off = segdw[j] - 3;
> + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
> + off <<= 1;
> + vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
> + vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
> + vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
> + roc_lmt_mov_seg(lmt_addr + 12 + off,
> + slist[j + 1], segdw[j + 1] - 3);
> + } else {
> + vst1q_u64(lmt_addr, cmd0[j]);
> + vst1q_u64(lmt_addr + 2, cmd1[j]);
> + /* Copy segs */
> + off = segdw[j] - 2;
> + roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
> + off <<= 1;
> + vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
> + vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
> + roc_lmt_mov_seg(lmt_addr + 8 + off,
> + slist[j + 1], segdw[j + 1] - 2);
> + }
> + lmt_status = roc_lmt_submit_ldeor(io_addr);
> + if (lmt_status == 0)
> + goto again0;
> + j += 2;
> + } else {
> +again1:
> + if ((flags & NIX_TX_NEED_EXT_HDR) &&
> + (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
> + vst1q_u64(lmt_addr, cmd0[j]);
> + vst1q_u64(lmt_addr + 2, cmd2[j]);
> + vst1q_u64(lmt_addr + 4, cmd1[j]);
> + /* Copy segs */
> + off = segdw[j] - 4;
> + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
> + off <<= 1;
> + vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
> + } else if (flags & NIX_TX_NEED_EXT_HDR) {
> + vst1q_u64(lmt_addr, cmd0[j]);
> + vst1q_u64(lmt_addr + 2, cmd2[j]);
> + vst1q_u64(lmt_addr + 4, cmd1[j]);
> + /* Copy segs */
> + off = segdw[j] - 3;
> + roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
> + } else {
> + vst1q_u64(lmt_addr, cmd0[j]);
> + vst1q_u64(lmt_addr + 2, cmd1[j]);
> + /* Copy segs */
> + off = segdw[j] - 2;
> + roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
> + }
> + lmt_status = roc_lmt_submit_ldeor(io_addr);
> + if (lmt_status == 0)
> + goto again1;
> + j += 1;
> + }
> + }
> +}
> +
> static __rte_always_inline uint16_t
> cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> uint16_t pkts, uint64_t *cmd, const uint16_t flags)
> @@ -1380,7 +1611,8 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> sendext23_w0 = vld1q_u64(sx_w0 + 2);
> }
>
> - if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
> + if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
> + !(flags & NIX_TX_MULTI_SEG_F)) {
> /* Set don't free bit if reference count > 1 */
> xmask01 = vdupq_n_u64(0);
> xmask23 = xmask01;
> @@ -1424,7 +1656,7 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> * cnxk_nix_prefree_seg are written before LMTST.
> */
> rte_io_wmb();
> - } else {
> + } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
> /* Move mbufs to iova */
> mbuf0 = (uint64_t *)tx_pkts[0];
> mbuf1 = (uint64_t *)tx_pkts[1];
> @@ -1472,7 +1704,27 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
> }
>
> - if (flags & NIX_TX_NEED_EXT_HDR) {
> + if (flags & NIX_TX_MULTI_SEG_F) {
> + uint64_t seg_list[NIX_DESCS_PER_LOOP]
> + [CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
> + uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1];
> +
> + /* Build mseg list for each packet individually. */
> + for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
> + segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j],
> + seg_list[j], &cmd0[j],
> + &cmd1[j], flags);
> + segdw[4] = 8;
> +
> + /* Commit all changes to mbuf before LMTST. */
> + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
> + rte_io_wmb();
> +
> + cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3,
> + segdw, seg_list,
> + lmt_addr, io_addr,
> + flags);
> + } else if (flags & NIX_TX_NEED_EXT_HDR) {
> /* With ext header in the command we can no longer send
> * all 4 packets together since LMTLINE is 128bytes.
> * Split and Tx twice.
> @@ -1534,9 +1786,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
> }
>
> - if (unlikely(pkts_left))
> - pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd,
> - flags);
> + if (unlikely(pkts_left)) {
> + if (flags & NIX_TX_MULTI_SEG_F)
> + pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
> + pkts_left, cmd, flags);
> + else
> + pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left,
> + cmd, flags);
> + }
>
> return pkts;
> }
> @@ -1701,6 +1958,9 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
> void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
> \
> uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \
> + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
> + \
> + uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \
> void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
>
> NIX_TX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn9k_tx_vec_mseg.c b/drivers/net/cnxk/cn9k_tx_vec_mseg.c
> new file mode 100644
> index 000000000..0256efd45
> --- /dev/null
> +++ b/drivers/net/cnxk/cn9k_tx_vec_mseg.c
> @@ -0,0 +1,24 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2021 Marvell.
> + */
> +
> +#include "cn9k_ethdev.h"
> +#include "cn9k_tx.h"
> +
> +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
> + uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \
> + void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
> + { \
> + uint64_t cmd[sz]; \
> + \
> + /* For TSO inner checksum is a must */ \
> + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
> + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
> + return 0; \
> + return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
> + (flags) | \
> + NIX_TX_MULTI_SEG_F); \
> + }
> +
> +NIX_TX_FASTPATH_MODES
> +#undef T
> diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
> index aa8c7253f..361f7ce84 100644
> --- a/drivers/net/cnxk/meson.build
> +++ b/drivers/net/cnxk/meson.build
> @@ -26,7 +26,8 @@ sources += files('cn9k_ethdev.c',
> 'cn9k_rx_vec_mseg.c',
> 'cn9k_tx.c',
> 'cn9k_tx_mseg.c',
> - 'cn9k_tx_vec.c')
> + 'cn9k_tx_vec.c',
> + 'cn9k_tx_vec_mseg.c')
> # CN10K
> sources += files('cn10k_ethdev.c',
> 'cn10k_rte_flow.c',
> @@ -36,7 +37,8 @@ sources += files('cn10k_ethdev.c',
> 'cn10k_rx_vec_mseg.c',
> 'cn10k_tx.c',
> 'cn10k_tx_mseg.c',
> - 'cn10k_tx_vec.c')
> + 'cn10k_tx_vec.c',
> + 'cn10k_tx_vec_mseg.c')
>
> deps += ['bus_pci', 'cryptodev', 'eventdev', 'security']
> deps += ['common_cnxk', 'mempool_cnxk']
> --
> 2.17.1
>
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v5 1/6] net/cnxk: add multi seg Rx vector routine
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 1/6] " pbhagavatula
` (4 preceding siblings ...)
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 6/6] net/cnxk: add multi seg Tx vector routine pbhagavatula
@ 2021-06-29 7:44 ` pbhagavatula
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 2/6] net/cnxk: enable ptp processing in vector Rx pbhagavatula
` (5 more replies)
5 siblings, 6 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-29 7:44 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add multi-segment Rx vector routine, form the primary mbufs using
vector path switch to scalar path when extracting segments.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Series-acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
---
v5 Changes:
- Fix incorrect mbuf assignment.
v4 Changes:
- Split patches for easier merge.
- Rebase on dpdk-next-net-mrvl.
v3 Changes:
- Spell check.
drivers/net/cnxk/cn10k_rx.c | 31 +++++++++++------
drivers/net/cnxk/cn10k_rx.h | 51 +++++++++++++++++++++-------
drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++
drivers/net/cnxk/cn9k_rx.c | 31 +++++++++++------
drivers/net/cnxk/cn9k_rx.h | 51 +++++++++++++++++++++-------
drivers/net/cnxk/cn9k_rx_vec_mseg.c | 18 ++++++++++
drivers/net/cnxk/meson.build | 2 ++
7 files changed, 157 insertions(+), 44 deletions(-)
create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c
create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c
diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c
index 5c956c06b..3a9fd7130 100644
--- a/drivers/net/cnxk/cn10k_rx.c
+++ b/drivers/net/cnxk/cn10k_rx.c
@@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
+
+ rte_atomic_thread_fence(__ATOMIC_RELEASE);
}
void
@@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
#undef R
};
- /* For PTP enabled, scalar rx function should be chosen as most of the
- * PTP apps are implemented to rx burst 1 pkt.
- */
- if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
- pick_rx_func(eth_dev, nix_eth_rx_burst);
- else
- pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
+ const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name,
- if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
- pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
/* Copy multi seg version with no offload for tear down sequence */
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
- rte_mb();
+
+ /* For PTP enabled, scalar rx function should be chosen as most of the
+ * PTP apps are implemented to rx burst 1 pkt.
+ */
+ if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_burst);
+ }
+
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
}
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index 1cc37cbaa..5926ff7f4 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
sg = *(const uint64_t *)(rx + 1);
nb_segs = (sg >> 48) & 0x3;
- mbuf->nb_segs = nb_segs;
+
+ if (nb_segs == 1) {
+ mbuf->next = NULL;
+ return;
+ }
+
+ mbuf->pkt_len = rx->pkt_lenm1 + 1;
mbuf->data_len = sg & 0xFFFF;
+ mbuf->nb_segs = nb_segs;
sg = sg >> 16;
eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1));
@@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf);
mbuf->ol_flags = ol_flags;
- *(uint64_t *)(&mbuf->rearm_data) = val;
mbuf->pkt_len = len;
+ mbuf->data_len = len;
+ *(uint64_t *)(&mbuf->rearm_data) = val;
- if (flag & NIX_RX_MULTI_SEG_F) {
+ if (flag & NIX_RX_MULTI_SEG_F)
nix_cqe_xtract_mseg(rx, mbuf, val);
- } else {
- mbuf->data_len = len;
+ else
mbuf->next = NULL;
- }
}
static inline uint16_t
@@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
- /* Update that no more segments */
- mbuf0->next = NULL;
- mbuf1->next = NULL;
- mbuf2->next = NULL;
- mbuf3->next = NULL;
-
/* Store the mbufs to rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
+ if (flags & NIX_RX_MULTI_SEG_F) {
+ /* Multi segment is enable build mseg list for
+ * individual mbufs in scalar mode.
+ */
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(0) + 8), mbuf0,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(1) + 8), mbuf1,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(2) + 8), mbuf2,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(3) + 8), mbuf3,
+ mbuf_initializer);
+ } else {
+ /* Update that no more segments */
+ mbuf0->next = NULL;
+ mbuf1->next = NULL;
+ mbuf2->next = NULL;
+ mbuf3->next = NULL;
+ }
+
/* Prefetch mbufs */
roc_prefetch_store_keep(mbuf0);
roc_prefetch_store_keep(mbuf1);
@@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
\
uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
+ \
+ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
new file mode 100644
index 000000000..04d1e46c8
--- /dev/null
+++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_ethdev.h"
+#include "cn10k_rx.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
+ { \
+ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
+ (flags) | NIX_RX_MULTI_SEG_F); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c
index 0acedd0a1..d293d4eac 100644
--- a/drivers/net/cnxk/cn9k_rx.c
+++ b/drivers/net/cnxk/cn9k_rx.c
@@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
+
+ rte_atomic_thread_fence(__ATOMIC_RELEASE);
}
void
@@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
#undef R
};
- /* For PTP enabled, scalar rx function should be chosen as most of the
- * PTP apps are implemented to rx burst 1 pkt.
- */
- if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
- pick_rx_func(eth_dev, nix_eth_rx_burst);
- else
- pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
+ const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name,
- if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
- pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
/* Copy multi seg version with no offload for tear down sequence */
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
- rte_mb();
+
+ /* For PTP enabled, scalar rx function should be chosen as most of the
+ * PTP apps are implemented to rx burst 1 pkt.
+ */
+ if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_burst);
+ }
+
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
}
diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
index 10ef5c690..5ae9e8195 100644
--- a/drivers/net/cnxk/cn9k_rx.h
+++ b/drivers/net/cnxk/cn9k_rx.h
@@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
sg = *(const uint64_t *)(rx + 1);
nb_segs = (sg >> 48) & 0x3;
- mbuf->nb_segs = nb_segs;
+
+ if (nb_segs == 1) {
+ mbuf->next = NULL;
+ return;
+ }
+
+ mbuf->pkt_len = rx->pkt_lenm1 + 1;
mbuf->data_len = sg & 0xFFFF;
+ mbuf->nb_segs = nb_segs;
sg = sg >> 16;
eol = ((const rte_iova_t *)(rx + 1) +
@@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf);
mbuf->ol_flags = ol_flags;
- *(uint64_t *)(&mbuf->rearm_data) = val;
mbuf->pkt_len = len;
+ mbuf->data_len = len;
+ *(uint64_t *)(&mbuf->rearm_data) = val;
- if (flag & NIX_RX_MULTI_SEG_F) {
+ if (flag & NIX_RX_MULTI_SEG_F)
nix_cqe_xtract_mseg(rx, mbuf, val);
- } else {
- mbuf->data_len = len;
+ else
mbuf->next = NULL;
- }
}
static inline uint16_t
@@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
- /* Update that no more segments */
- mbuf0->next = NULL;
- mbuf1->next = NULL;
- mbuf2->next = NULL;
- mbuf3->next = NULL;
-
/* Store the mbufs to rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
+ if (flags & NIX_RX_MULTI_SEG_F) {
+ /* Multi segment is enable build mseg list for
+ * individual mbufs in scalar mode.
+ */
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(0) + 8), mbuf0,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(1) + 8), mbuf1,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(2) + 8), mbuf2,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(3) + 8), mbuf3,
+ mbuf_initializer);
+ } else {
+ /* Update that no more segments */
+ mbuf0->next = NULL;
+ mbuf1->next = NULL;
+ mbuf2->next = NULL;
+ mbuf3->next = NULL;
+ }
+
/* Prefetch mbufs */
roc_prefetch_store_keep(mbuf0);
roc_prefetch_store_keep(mbuf1);
@@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
\
uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
+ \
+ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
new file mode 100644
index 000000000..e46d8a474
--- /dev/null
+++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_ethdev.h"
+#include "cn9k_rx.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
+ { \
+ return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
+ (flags) | \
+ NIX_RX_MULTI_SEG_F); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
index 2071d0dcb..aa8c7253f 100644
--- a/drivers/net/cnxk/meson.build
+++ b/drivers/net/cnxk/meson.build
@@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c',
'cn9k_rx.c',
'cn9k_rx_mseg.c',
'cn9k_rx_vec.c',
+ 'cn9k_rx_vec_mseg.c',
'cn9k_tx.c',
'cn9k_tx_mseg.c',
'cn9k_tx_vec.c')
@@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c',
'cn10k_rx.c',
'cn10k_rx_mseg.c',
'cn10k_rx_vec.c',
+ 'cn10k_rx_vec_mseg.c',
'cn10k_tx.c',
'cn10k_tx_mseg.c',
'cn10k_tx_vec.c')
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v5 2/6] net/cnxk: enable ptp processing in vector Rx
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 1/6] net/cnxk: add multi seg Rx " pbhagavatula
@ 2021-06-29 7:44 ` pbhagavatula
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 3/6] net/cnxk: enable VLAN processing in vector Tx pbhagavatula
` (4 subsequent siblings)
5 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-29 7:44 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Enable PTP offload in vector Rx burst function, use vector path
for processing mbufs and finally switch to scalar when extracting
timestamp.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/net/cnxk/cn10k_ethdev.c | 1 -
drivers/net/cnxk/cn10k_rx.c | 5 +-
drivers/net/cnxk/cn10k_rx.h | 124 ++++++++++++++++++++++++++++----
drivers/net/cnxk/cn10k_rx_vec.c | 3 -
drivers/net/cnxk/cn9k_ethdev.c | 1 -
drivers/net/cnxk/cn9k_rx.c | 5 +-
drivers/net/cnxk/cn9k_rx.h | 124 ++++++++++++++++++++++++++++----
drivers/net/cnxk/cn9k_rx_vec.c | 3 -
drivers/net/cnxk/cnxk_ethdev.h | 19 ++---
9 files changed, 232 insertions(+), 53 deletions(-)
diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c
index b079edbd3..7caec6cf1 100644
--- a/drivers/net/cnxk/cn10k_ethdev.c
+++ b/drivers/net/cnxk/cn10k_ethdev.c
@@ -301,7 +301,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev)
if (nix_recalc_mtu(eth_dev))
plt_err("Failed to set MTU size for ptp");
- dev->scalar_ena = true;
dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F;
/* Setting up the function pointers as per new offload flags */
diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c
index 3a9fd7130..69e767ac3 100644
--- a/drivers/net/cnxk/cn10k_rx.c
+++ b/drivers/net/cnxk/cn10k_rx.c
@@ -75,10 +75,7 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
- /* For PTP enabled, scalar rx function should be chosen as most of the
- * PTP apps are implemented to rx burst 1 pkt.
- */
- if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+ if (dev->scalar_ena) {
if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
return pick_rx_func(eth_dev, nix_eth_rx_burst);
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index 5926ff7f4..d9572b19e 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -109,7 +109,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags,
static __rte_always_inline void
nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
- uint64_t rearm)
+ uint64_t rearm, const uint16_t flags)
{
const rte_iova_t *iova_list;
struct rte_mbuf *head;
@@ -125,8 +125,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
return;
}
- mbuf->pkt_len = rx->pkt_lenm1 + 1;
- mbuf->data_len = sg & 0xFFFF;
+ mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
+ CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
+ mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
+ CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
mbuf->nb_segs = nb_segs;
sg = sg >> 16;
@@ -207,7 +209,7 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
*(uint64_t *)(&mbuf->rearm_data) = val;
if (flag & NIX_RX_MULTI_SEG_F)
- nix_cqe_xtract_mseg(rx, mbuf, val);
+ nix_cqe_xtract_mseg(rx, mbuf, val, flag);
else
mbuf->next = NULL;
}
@@ -272,8 +274,9 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts,
flags);
cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp,
(flags & NIX_RX_OFFLOAD_TSTAMP_F),
- (uint64_t *)((uint8_t *)mbuf + data_off)
- );
+ (flags & NIX_RX_MULTI_SEG_F),
+ (uint64_t *)((uint8_t *)mbuf
+ + data_off));
rx_pkts[packets++] = mbuf;
roc_prefetch_store_keep(mbuf);
head++;
@@ -469,6 +472,99 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
mbuf3);
}
+ if (flags & NIX_RX_OFFLOAD_TSTAMP_F) {
+ const uint16x8_t len_off = {
+ 0, /* ptype 0:15 */
+ 0, /* ptype 16:32 */
+ CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/
+ 0, /* pktlen 16:32 */
+ CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */
+ 0,
+ 0,
+ 0};
+ const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC};
+ const uint64_t ts_olf = PKT_RX_IEEE1588_PTP |
+ PKT_RX_IEEE1588_TMST |
+ rxq->tstamp->rx_tstamp_dynflag;
+ const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8};
+ uint64x2_t ts01, ts23, mask;
+ uint64_t ts[4];
+ uint8_t res;
+
+ /* Subtract timesync length from total pkt length. */
+ f0 = vsubq_u16(f0, len_off);
+ f1 = vsubq_u16(f1, len_off);
+ f2 = vsubq_u16(f2, len_off);
+ f3 = vsubq_u16(f3, len_off);
+
+ /* Get the address of actual timestamp. */
+ ts01 = vaddq_u64(mbuf01, data_off);
+ ts23 = vaddq_u64(mbuf23, data_off);
+ /* Load timestamp from address. */
+ ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01,
+ 0),
+ ts01, 0);
+ ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01,
+ 1),
+ ts01, 1);
+ ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23,
+ 0),
+ ts23, 0);
+ ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23,
+ 1),
+ ts23, 1);
+ /* Convert from be to cpu byteorder. */
+ ts01 = vrev64q_u8(ts01);
+ ts23 = vrev64q_u8(ts23);
+ /* Store timestamp into scalar for later use. */
+ ts[0] = vgetq_lane_u64(ts01, 0);
+ ts[1] = vgetq_lane_u64(ts01, 1);
+ ts[2] = vgetq_lane_u64(ts23, 0);
+ ts[3] = vgetq_lane_u64(ts23, 1);
+
+ /* Store timestamp into dynfield. */
+ *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) =
+ ts[0];
+ *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) =
+ ts[1];
+ *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) =
+ ts[2];
+ *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) =
+ ts[3];
+
+ /* Generate ptype mask to filter L2 ether timesync */
+ mask = vdupq_n_u32(vgetq_lane_u32(f0, 0));
+ mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1);
+ mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2);
+ mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3);
+
+ /* Match against L2 ether timesync. */
+ mask = vceqq_u32(mask, ptype);
+ /* Convert from vector from scalar mask */
+ res = vaddvq_u32(vandq_u32(mask, and_mask));
+ res &= 0xF;
+
+ if (res) {
+ /* Fill in the ol_flags for any packets that
+ * matched.
+ */
+ ol_flags0 |= ((res & 0x1) ? ts_olf : 0);
+ ol_flags1 |= ((res & 0x2) ? ts_olf : 0);
+ ol_flags2 |= ((res & 0x4) ? ts_olf : 0);
+ ol_flags3 |= ((res & 0x8) ? ts_olf : 0);
+
+ /* Update Rxq timestamp with the latest
+ * timestamp.
+ */
+ rxq->tstamp->rx_ready = 1;
+ rxq->tstamp->rx_tstamp =
+ ts[31 - __builtin_clz(res)];
+ }
+ }
+
/* Form rearm_data with ol_flags */
rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1);
rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1);
@@ -496,17 +592,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
* individual mbufs in scalar mode.
*/
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(0) + 8), mbuf0,
- mbuf_initializer);
+ (cq0 + CQE_SZ(0) + 8), mbuf0,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(1) + 8), mbuf1,
- mbuf_initializer);
+ (cq0 + CQE_SZ(1) + 8), mbuf1,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(2) + 8), mbuf2,
- mbuf_initializer);
+ (cq0 + CQE_SZ(2) + 8), mbuf2,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(3) + 8), mbuf3,
- mbuf_initializer);
+ (cq0 + CQE_SZ(3) + 8), mbuf3,
+ mbuf_initializer, flags);
} else {
/* Update that no more segments */
mbuf0->next = NULL;
diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c
index 65ffa9784..93528a44f 100644
--- a/drivers/net/cnxk/cn10k_rx_vec.c
+++ b/drivers/net/cnxk/cn10k_rx_vec.c
@@ -11,9 +11,6 @@
struct rte_mbuf **rx_pkts, \
uint16_t pkts) \
{ \
- /* TSTMP is not supported by vector */ \
- if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \
- return 0; \
return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
(flags)); \
}
diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c
index 994fdb7c3..115e67891 100644
--- a/drivers/net/cnxk/cn9k_ethdev.c
+++ b/drivers/net/cnxk/cn9k_ethdev.c
@@ -309,7 +309,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev)
if (nix_recalc_mtu(eth_dev))
plt_err("Failed to set MTU size for ptp");
- dev->scalar_ena = true;
dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F;
/* Setting up the function pointers as per new offload flags */
diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c
index d293d4eac..7d9f1bd61 100644
--- a/drivers/net/cnxk/cn9k_rx.c
+++ b/drivers/net/cnxk/cn9k_rx.c
@@ -75,10 +75,7 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
- /* For PTP enabled, scalar rx function should be chosen as most of the
- * PTP apps are implemented to rx burst 1 pkt.
- */
- if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+ if (dev->scalar_ena) {
if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
return pick_rx_func(eth_dev, nix_eth_rx_burst);
diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
index 5ae9e8195..beb52f39d 100644
--- a/drivers/net/cnxk/cn9k_rx.h
+++ b/drivers/net/cnxk/cn9k_rx.h
@@ -110,7 +110,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags,
static __rte_always_inline void
nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
- uint64_t rearm)
+ uint64_t rearm, const uint16_t flags)
{
const rte_iova_t *iova_list;
struct rte_mbuf *head;
@@ -126,8 +126,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
return;
}
- mbuf->pkt_len = rx->pkt_lenm1 + 1;
- mbuf->data_len = sg & 0xFFFF;
+ mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
+ CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
+ mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
+ CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
mbuf->nb_segs = nb_segs;
sg = sg >> 16;
@@ -210,7 +212,7 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
*(uint64_t *)(&mbuf->rearm_data) = val;
if (flag & NIX_RX_MULTI_SEG_F)
- nix_cqe_xtract_mseg(rx, mbuf, val);
+ nix_cqe_xtract_mseg(rx, mbuf, val, flag);
else
mbuf->next = NULL;
}
@@ -275,8 +277,9 @@ cn9k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts,
flags);
cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp,
(flags & NIX_RX_OFFLOAD_TSTAMP_F),
- (uint64_t *)((uint8_t *)mbuf + data_off)
- );
+ (flags & NIX_RX_MULTI_SEG_F),
+ (uint64_t *)((uint8_t *)mbuf
+ + data_off));
rx_pkts[packets++] = mbuf;
roc_prefetch_store_keep(mbuf);
head++;
@@ -472,6 +475,99 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
mbuf3);
}
+ if (flags & NIX_RX_OFFLOAD_TSTAMP_F) {
+ const uint16x8_t len_off = {
+ 0, /* ptype 0:15 */
+ 0, /* ptype 16:32 */
+ CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/
+ 0, /* pktlen 16:32 */
+ CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */
+ 0,
+ 0,
+ 0};
+ const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC,
+ RTE_PTYPE_L2_ETHER_TIMESYNC};
+ const uint64_t ts_olf = PKT_RX_IEEE1588_PTP |
+ PKT_RX_IEEE1588_TMST |
+ rxq->tstamp->rx_tstamp_dynflag;
+ const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8};
+ uint64x2_t ts01, ts23, mask;
+ uint64_t ts[4];
+ uint8_t res;
+
+ /* Subtract timesync length from total pkt length. */
+ f0 = vsubq_u16(f0, len_off);
+ f1 = vsubq_u16(f1, len_off);
+ f2 = vsubq_u16(f2, len_off);
+ f3 = vsubq_u16(f3, len_off);
+
+ /* Get the address of actual timestamp. */
+ ts01 = vaddq_u64(mbuf01, data_off);
+ ts23 = vaddq_u64(mbuf23, data_off);
+ /* Load timestamp from address. */
+ ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01,
+ 0),
+ ts01, 0);
+ ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01,
+ 1),
+ ts01, 1);
+ ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23,
+ 0),
+ ts23, 0);
+ ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23,
+ 1),
+ ts23, 1);
+ /* Convert from be to cpu byteorder. */
+ ts01 = vrev64q_u8(ts01);
+ ts23 = vrev64q_u8(ts23);
+ /* Store timestamp into scalar for later use. */
+ ts[0] = vgetq_lane_u64(ts01, 0);
+ ts[1] = vgetq_lane_u64(ts01, 1);
+ ts[2] = vgetq_lane_u64(ts23, 0);
+ ts[3] = vgetq_lane_u64(ts23, 1);
+
+ /* Store timestamp into dynfield. */
+ *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) =
+ ts[0];
+ *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) =
+ ts[1];
+ *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) =
+ ts[2];
+ *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) =
+ ts[3];
+
+ /* Generate ptype mask to filter L2 ether timesync */
+ mask = vdupq_n_u32(vgetq_lane_u32(f0, 0));
+ mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1);
+ mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2);
+ mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3);
+
+ /* Match against L2 ether timesync. */
+ mask = vceqq_u32(mask, ptype);
+ /* Convert from vector from scalar mask */
+ res = vaddvq_u32(vandq_u32(mask, and_mask));
+ res &= 0xF;
+
+ if (res) {
+ /* Fill in the ol_flags for any packets that
+ * matched.
+ */
+ ol_flags0 |= ((res & 0x1) ? ts_olf : 0);
+ ol_flags1 |= ((res & 0x2) ? ts_olf : 0);
+ ol_flags2 |= ((res & 0x4) ? ts_olf : 0);
+ ol_flags3 |= ((res & 0x8) ? ts_olf : 0);
+
+ /* Update Rxq timestamp with the latest
+ * timestamp.
+ */
+ rxq->tstamp->rx_ready = 1;
+ rxq->tstamp->rx_tstamp =
+ ts[31 - __builtin_clz(res)];
+ }
+ }
+
/* Form rearm_data with ol_flags */
rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1);
rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1);
@@ -499,17 +595,17 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
* individual mbufs in scalar mode.
*/
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(0) + 8), mbuf0,
- mbuf_initializer);
+ (cq0 + CQE_SZ(0) + 8), mbuf0,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(1) + 8), mbuf1,
- mbuf_initializer);
+ (cq0 + CQE_SZ(1) + 8), mbuf1,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(2) + 8), mbuf2,
- mbuf_initializer);
+ (cq0 + CQE_SZ(2) + 8), mbuf2,
+ mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(3) + 8), mbuf3,
- mbuf_initializer);
+ (cq0 + CQE_SZ(3) + 8), mbuf3,
+ mbuf_initializer, flags);
} else {
/* Update that no more segments */
mbuf0->next = NULL;
diff --git a/drivers/net/cnxk/cn9k_rx_vec.c b/drivers/net/cnxk/cn9k_rx_vec.c
index e61c2225c..ef5f771ef 100644
--- a/drivers/net/cnxk/cn9k_rx_vec.c
+++ b/drivers/net/cnxk/cn9k_rx_vec.c
@@ -9,9 +9,6 @@
uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
{ \
- /* TSTMP is not supported by vector */ \
- if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \
- return 0; \
return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
(flags)); \
}
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 67b1f4253..4eead0390 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -136,13 +136,12 @@ struct cnxk_eth_qconf {
};
struct cnxk_timesync_info {
+ uint8_t rx_ready;
+ uint64_t rx_tstamp;
uint64_t rx_tstamp_dynflag;
+ int tstamp_dynfield_offset;
rte_iova_t tx_tstamp_iova;
uint64_t *tx_tstamp;
- uint64_t rx_tstamp;
- int tstamp_dynfield_offset;
- uint8_t tx_ready;
- uint8_t rx_ready;
} __plt_cache_aligned;
struct cnxk_eth_dev {
@@ -465,13 +464,15 @@ cnxk_nix_timestamp_dynfield(struct rte_mbuf *mbuf,
static __rte_always_inline void
cnxk_nix_mbuf_to_tstamp(struct rte_mbuf *mbuf,
- struct cnxk_timesync_info *tstamp, bool ts_enable,
+ struct cnxk_timesync_info *tstamp,
+ const uint8_t ts_enable, const uint8_t mseg_enable,
uint64_t *tstamp_ptr)
{
- if (ts_enable &&
- (mbuf->data_off ==
- RTE_PKTMBUF_HEADROOM + CNXK_NIX_TIMESYNC_RX_OFFSET)) {
- mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET;
+ if (ts_enable) {
+ if (!mseg_enable) {
+ mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET;
+ mbuf->data_len -= CNXK_NIX_TIMESYNC_RX_OFFSET;
+ }
/* Reading the rx timestamp inserted by CGX, viz at
* starting of the packet data.
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v5 3/6] net/cnxk: enable VLAN processing in vector Tx
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 1/6] net/cnxk: add multi seg Rx " pbhagavatula
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 2/6] net/cnxk: enable ptp processing in vector Rx pbhagavatula
@ 2021-06-29 7:44 ` pbhagavatula
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 4/6] net/cnxk: enable ptp " pbhagavatula
` (3 subsequent siblings)
5 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-29 7:44 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Enable VLAN offload in vector Tx burst function.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/net/cnxk/cn10k_tx.c | 3 +-
drivers/net/cnxk/cn10k_tx.h | 125 +++++++++++++++++++++++++++----
drivers/net/cnxk/cn10k_tx_vec.c | 3 +-
drivers/net/cnxk/cn9k_tx.c | 3 +-
drivers/net/cnxk/cn9k_tx.h | 128 ++++++++++++++++++++++++++++----
drivers/net/cnxk/cn9k_tx_vec.c | 3 +-
6 files changed, 227 insertions(+), 38 deletions(-)
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 18694dc70..05bc163a4 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -69,8 +69,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
if (dev->scalar_ena ||
(dev->tx_offload_flags &
- (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |
- NIX_TX_OFFLOAD_TSO_F)))
+ (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 8b1446f25..1e1697858 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -62,9 +62,14 @@ cn10k_nix_tx_ext_subs(const uint16_t flags)
static __rte_always_inline uint8_t
cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
{
- RTE_SET_USED(flags);
- /* We can pack up to 4 packets per LMTLINE if there are no offloads. */
- return 4 << ROC_LMT_LINES_PER_CORE_LOG2;
+ return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
+ << ROC_LMT_LINES_PER_CORE_LOG2;
+}
+
+static __rte_always_inline uint8_t
+cn10k_nix_tx_dwords_per_line(const uint16_t flags)
+{
+ return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8;
}
static __rte_always_inline uint64_t
@@ -98,10 +103,9 @@ cn10k_nix_tx_steor_data(const uint16_t flags)
static __rte_always_inline uint64_t
cn10k_nix_tx_steor_vec_data(const uint16_t flags)
{
- const uint64_t dw_m1 = 0x7;
+ const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
uint64_t data;
- RTE_SET_USED(flags);
/* This will be moved to addr area */
data = dw_m1;
/* 15 vector sizes for single seg */
@@ -690,11 +694,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
{
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
- uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP];
+ uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
+ cmd2[NIX_DESCS_PER_LOOP];
uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
uint16_t left, scalar, burst, i, lmt_id;
+ uint64x2_t sendext01_w0, sendext23_w0;
+ uint64x2_t sendext01_w1, sendext23_w1;
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn10k_eth_txq *txq = tx_queue;
@@ -720,6 +727,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
sgdesc23_w0 = sgdesc01_w0;
+ /* Load command defaults into vector variables. */
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
+ sendext23_w0 = sendext01_w0;
+ sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
+ sendext23_w1 = sendext01_w1;
+ }
+
/* Get LMT base address and LMT ID as lcore id */
ROC_LMT_BASE_ID_GET(laddr, lmt_id);
left = pkts;
@@ -738,6 +753,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
senddesc23_w0 = senddesc01_w0;
sgdesc23_w0 = sgdesc01_w0;
+ /* Clear vlan enables. */
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ sendext01_w1 = vbicq_u64(sendext01_w1,
+ vdupq_n_u64(0x3FFFF00FFFF00));
+ sendext23_w1 = sendext01_w1;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1303,6 +1325,52 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
+ if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
+ /* Tx ol_flag for vlan. */
+ const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
+ /* Bit enable for VLAN1 */
+ const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
+ /* Tx ol_flag for QnQ. */
+ const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
+ /* Bit enable for VLAN0 */
+ const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
+ /* Load vlan values from packet. outer is VLAN 0 */
+ uint64x2_t ext01 = {
+ ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
+ ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
+ };
+ uint64x2_t ext23 = {
+ ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
+ ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
+ };
+
+ /* Get ol_flags of the packets. */
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* ORR vlan outer/inner values into cmd. */
+ sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
+ sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
+
+ /* Test for offload enable bits and generate masks. */
+ xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
+ mlv),
+ vandq_u64(vtstq_u64(xtmp128, olq),
+ mlq));
+ ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
+ mlv),
+ vandq_u64(vtstq_u64(ytmp128, olq),
+ mlq));
+
+ /* Set vlan enable bits into cmd based on mask. */
+ sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
+ sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
@@ -1381,16 +1449,41 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
- /* Store the prepared send desc to LMT lines */
- vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
- vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
- vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
- vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
- vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
- vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
- vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
- vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
- lnum += 1;
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
+ cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
+ cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
+ cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
+ }
+
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ /* Store the prepared send desc to LMT lines */
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
+ lnum += 1;
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
+ lnum += 1;
+ } else {
+ /* Store the prepared send desc to LMT lines */
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
+ lnum += 1;
+ }
tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
}
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index 7453f3bc9..beb5c649b 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -14,8 +14,7 @@
uint64_t cmd[sz]; \
\
/* VLAN, TSTMP, TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \
- (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
+ if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
(flags) & NIX_TX_OFFLOAD_TSO_F) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
index b80260607..4b43cdaff 100644
--- a/drivers/net/cnxk/cn9k_tx.c
+++ b/drivers/net/cnxk/cn9k_tx.c
@@ -68,8 +68,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
if (dev->scalar_ena ||
(dev->tx_offload_flags &
- (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |
- NIX_TX_OFFLOAD_TSO_F)))
+ (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index 1899d6670..d5715bb52 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -552,10 +552,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
{
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
- uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP];
+ uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
+ cmd2[NIX_DESCS_PER_LOOP];
uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
+ uint64x2_t sendext01_w0, sendext23_w0;
+ uint64x2_t sendext01_w1, sendext23_w1;
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn9k_eth_txq *txq = tx_queue;
@@ -585,8 +588,19 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
senddesc23_w0 = senddesc01_w0;
senddesc01_w1 = vdupq_n_u64(0);
senddesc23_w1 = senddesc01_w1;
- sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
- sgdesc23_w0 = sgdesc01_w0;
+
+ /* Load command defaults into vector variables. */
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]);
+ sendext23_w0 = sendext01_w0;
+ sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
+ sendext23_w1 = sendext01_w1;
+ sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]);
+ sgdesc23_w0 = sgdesc01_w0;
+ } else {
+ sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
+ sgdesc23_w0 = sgdesc01_w0;
+ }
for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
/* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
@@ -597,6 +611,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
senddesc23_w0 = senddesc01_w0;
sgdesc23_w0 = sgdesc01_w0;
+ /* Clear vlan enables. */
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ sendext01_w1 = vbicq_u64(sendext01_w1,
+ vdupq_n_u64(0x3FFFF00FFFF00));
+ sendext23_w1 = sendext01_w1;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1162,6 +1183,52 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
+ if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
+ /* Tx ol_flag for vlan. */
+ const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
+ /* Bit enable for VLAN1 */
+ const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
+ /* Tx ol_flag for QnQ. */
+ const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
+ /* Bit enable for VLAN0 */
+ const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
+ /* Load vlan values from packet. outer is VLAN 0 */
+ uint64x2_t ext01 = {
+ ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
+ ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
+ };
+ uint64x2_t ext23 = {
+ ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
+ ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
+ ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
+ };
+
+ /* Get ol_flags of the packets. */
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* ORR vlan outer/inner values into cmd. */
+ sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
+ sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
+
+ /* Test for offload enable bits and generate masks. */
+ xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
+ mlv),
+ vandq_u64(vtstq_u64(xtmp128, olq),
+ mlq));
+ ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
+ mlv),
+ vandq_u64(vtstq_u64(ytmp128, olq),
+ mlq));
+
+ /* Set vlan enable bits into cmd based on mask. */
+ sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
+ sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
@@ -1247,17 +1314,50 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
- do {
- vst1q_u64(lmt_addr, cmd0[0]);
- vst1q_u64(lmt_addr + 2, cmd1[0]);
- vst1q_u64(lmt_addr + 4, cmd0[1]);
- vst1q_u64(lmt_addr + 6, cmd1[1]);
- vst1q_u64(lmt_addr + 8, cmd0[2]);
- vst1q_u64(lmt_addr + 10, cmd1[2]);
- vst1q_u64(lmt_addr + 12, cmd0[3]);
- vst1q_u64(lmt_addr + 14, cmd1[3]);
- lmt_status = roc_lmt_submit_ldeor(io_addr);
- } while (lmt_status == 0);
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
+ cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
+ cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
+ cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
+ }
+
+ if (flags & NIX_TX_NEED_EXT_HDR) {
+ /* With ext header in the command we can no longer send
+ * all 4 packets together since LMTLINE is 128bytes.
+ * Split and Tx twice.
+ */
+ do {
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd2[0]);
+ vst1q_u64(lmt_addr + 4, cmd1[0]);
+ vst1q_u64(lmt_addr + 6, cmd0[1]);
+ vst1q_u64(lmt_addr + 8, cmd2[1]);
+ vst1q_u64(lmt_addr + 10, cmd1[1]);
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ } while (lmt_status == 0);
+
+ do {
+ vst1q_u64(lmt_addr, cmd0[2]);
+ vst1q_u64(lmt_addr + 2, cmd2[2]);
+ vst1q_u64(lmt_addr + 4, cmd1[2]);
+ vst1q_u64(lmt_addr + 6, cmd0[3]);
+ vst1q_u64(lmt_addr + 8, cmd2[3]);
+ vst1q_u64(lmt_addr + 10, cmd1[3]);
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ } while (lmt_status == 0);
+ } else {
+ do {
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd1[0]);
+ vst1q_u64(lmt_addr + 4, cmd0[1]);
+ vst1q_u64(lmt_addr + 6, cmd1[1]);
+ vst1q_u64(lmt_addr + 8, cmd0[2]);
+ vst1q_u64(lmt_addr + 10, cmd1[2]);
+ vst1q_u64(lmt_addr + 12, cmd0[3]);
+ vst1q_u64(lmt_addr + 14, cmd1[3]);
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ } while (lmt_status == 0);
+ }
tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
}
diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c
index a6e7c9e54..5842facb5 100644
--- a/drivers/net/cnxk/cn9k_tx_vec.c
+++ b/drivers/net/cnxk/cn9k_tx_vec.c
@@ -14,8 +14,7 @@
uint64_t cmd[sz]; \
\
/* VLAN, TSTMP, TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \
- (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
+ if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
(flags) & NIX_TX_OFFLOAD_TSO_F) \
return 0; \
return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v5 4/6] net/cnxk: enable ptp processing in vector Tx
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 1/6] net/cnxk: add multi seg Rx " pbhagavatula
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 2/6] net/cnxk: enable ptp processing in vector Rx pbhagavatula
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 3/6] net/cnxk: enable VLAN processing in vector Tx pbhagavatula
@ 2021-06-29 7:44 ` pbhagavatula
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 5/6] net/cnxk: enable TSO " pbhagavatula
` (2 subsequent siblings)
5 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-29 7:44 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Enable PTP offload in vector Tx burst function. Since, we can
no-longer use a single LMT line for burst of 4, split the LMT
into two and transmit twice.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/net/cnxk/cn10k_tx.c | 4 +-
drivers/net/cnxk/cn10k_tx.h | 109 +++++++++++++++++++++++++++-----
drivers/net/cnxk/cn10k_tx_vec.c | 5 +-
drivers/net/cnxk/cn9k_tx.c | 4 +-
drivers/net/cnxk/cn9k_tx.h | 105 ++++++++++++++++++++++++++----
drivers/net/cnxk/cn9k_tx_vec.c | 5 +-
6 files changed, 192 insertions(+), 40 deletions(-)
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 05bc163a4..c4c3e6570 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -67,9 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena ||
- (dev->tx_offload_flags &
- (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
+ if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 1e1697858..8af6799ff 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -69,7 +69,9 @@ cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
static __rte_always_inline uint8_t
cn10k_nix_tx_dwords_per_line(const uint16_t flags)
{
- return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8;
+ return (flags & NIX_TX_NEED_EXT_HDR) ?
+ ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
+ 8;
}
static __rte_always_inline uint64_t
@@ -695,13 +697,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
- cmd2[NIX_DESCS_PER_LOOP];
+ cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
uint16_t left, scalar, burst, i, lmt_id;
uint64x2_t sendext01_w0, sendext23_w0;
uint64x2_t sendext01_w1, sendext23_w1;
+ uint64x2_t sendmem01_w0, sendmem23_w0;
+ uint64x2_t sendmem01_w1, sendmem23_w1;
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn10k_eth_txq *txq = tx_queue;
@@ -733,6 +737,12 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w0 = sendext01_w0;
sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
sendext23_w1 = sendext01_w1;
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
+ sendmem23_w0 = sendmem01_w0;
+ sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
+ sendmem23_w1 = sendmem01_w1;
+ }
}
/* Get LMT base address and LMT ID as lcore id */
@@ -760,6 +770,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w1 = sendext01_w1;
}
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ /* Reset send mem alg to SETTSTMP from SUB*/
+ sendmem01_w0 = vbicq_u64(sendmem01_w0,
+ vdupq_n_u64(BIT_ULL(59)));
+ /* Reset send mem address to default. */
+ sendmem01_w1 =
+ vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
+ sendmem23_w0 = sendmem01_w0;
+ sendmem23_w1 = sendmem01_w1;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1371,6 +1392,44 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
}
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ /* Tx ol_flag for timestam. */
+ const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
+ PKT_TX_IEEE1588_TMST};
+ /* Set send mem alg to SUB. */
+ const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
+ /* Increment send mem address by 8. */
+ const uint64x2_t addr = {0x8, 0x8};
+
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* Check if timestamp is requested and generate inverted
+ * mask as we need not make any changes to default cmd
+ * value.
+ */
+ xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
+ ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
+
+ /* Change send mem address to an 8 byte offset when
+ * TSTMP is disabled.
+ */
+ sendmem01_w1 = vaddq_u64(sendmem01_w1,
+ vandq_u64(xtmp128, addr));
+ sendmem23_w1 = vaddq_u64(sendmem23_w1,
+ vandq_u64(ytmp128, addr));
+ /* Change send mem alg to SUB when TSTMP is disabled. */
+ sendmem01_w0 = vorrq_u64(sendmem01_w0,
+ vandq_u64(xtmp128, alg));
+ sendmem23_w0 = vorrq_u64(sendmem23_w0,
+ vandq_u64(ytmp128, alg));
+
+ cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
+ cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
+ cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
+ cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
@@ -1458,19 +1517,39 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (flags & NIX_TX_NEED_EXT_HDR) {
/* Store the prepared send desc to LMT lines */
- vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
- vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
- vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
- vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
- vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
- vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
- lnum += 1;
- vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
- vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
- vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
- vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
- vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
- vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
+ lnum += 1;
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
+ } else {
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
+ lnum += 1;
+ vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
+ vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
+ }
lnum += 1;
} else {
/* Store the prepared send desc to LMT lines */
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index beb5c649b..0b4a4c7ba 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -13,9 +13,8 @@
{ \
uint64_t cmd[sz]; \
\
- /* VLAN, TSTMP, TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
- (flags) & NIX_TX_OFFLOAD_TSO_F) \
+ /* TSO is not supported by vec */ \
+ if ((flags) & NIX_TX_OFFLOAD_TSO_F) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
(flags)); \
diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
index 4b43cdaff..c32681ed4 100644
--- a/drivers/net/cnxk/cn9k_tx.c
+++ b/drivers/net/cnxk/cn9k_tx.c
@@ -66,9 +66,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena ||
- (dev->tx_offload_flags &
- (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
+ if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index d5715bb52..cb574a1c1 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -553,12 +553,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
- cmd2[NIX_DESCS_PER_LOOP];
+ cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
uint64x2_t sendext01_w0, sendext23_w0;
uint64x2_t sendext01_w1, sendext23_w1;
+ uint64x2_t sendmem01_w0, sendmem23_w0;
+ uint64x2_t sendmem01_w1, sendmem23_w1;
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn9k_eth_txq *txq = tx_queue;
@@ -597,6 +599,12 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w1 = sendext01_w1;
sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]);
sgdesc23_w0 = sgdesc01_w0;
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ sendmem01_w0 = vld1q_dup_u64(&txq->cmd[6]);
+ sendmem23_w0 = sendmem01_w0;
+ sendmem01_w1 = vld1q_dup_u64(&txq->cmd[7]);
+ sendmem23_w1 = sendmem01_w1;
+ }
} else {
sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
sgdesc23_w0 = sgdesc01_w0;
@@ -618,6 +626,17 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w1 = sendext01_w1;
}
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ /* Reset send mem alg to SETTSTMP from SUB*/
+ sendmem01_w0 = vbicq_u64(sendmem01_w0,
+ vdupq_n_u64(BIT_ULL(59)));
+ /* Reset send mem address to default. */
+ sendmem01_w1 =
+ vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
+ sendmem23_w0 = sendmem01_w0;
+ sendmem23_w1 = sendmem01_w1;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1229,6 +1248,44 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
}
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ /* Tx ol_flag for timestam. */
+ const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
+ PKT_TX_IEEE1588_TMST};
+ /* Set send mem alg to SUB. */
+ const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
+ /* Increment send mem address by 8. */
+ const uint64x2_t addr = {0x8, 0x8};
+
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* Check if timestamp is requested and generate inverted
+ * mask as we need not make any changes to default cmd
+ * value.
+ */
+ xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
+ ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
+
+ /* Change send mem address to an 8 byte offset when
+ * TSTMP is disabled.
+ */
+ sendmem01_w1 = vaddq_u64(sendmem01_w1,
+ vandq_u64(xtmp128, addr));
+ sendmem23_w1 = vaddq_u64(sendmem23_w1,
+ vandq_u64(ytmp128, addr));
+ /* Change send mem alg to SUB when TSTMP is disabled. */
+ sendmem01_w0 = vorrq_u64(sendmem01_w0,
+ vandq_u64(xtmp128, alg));
+ sendmem23_w0 = vorrq_u64(sendmem23_w0,
+ vandq_u64(ytmp128, alg));
+
+ cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
+ cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
+ cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
+ cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
@@ -1327,22 +1384,44 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
* Split and Tx twice.
*/
do {
- vst1q_u64(lmt_addr, cmd0[0]);
- vst1q_u64(lmt_addr + 2, cmd2[0]);
- vst1q_u64(lmt_addr + 4, cmd1[0]);
- vst1q_u64(lmt_addr + 6, cmd0[1]);
- vst1q_u64(lmt_addr + 8, cmd2[1]);
- vst1q_u64(lmt_addr + 10, cmd1[1]);
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd2[0]);
+ vst1q_u64(lmt_addr + 4, cmd1[0]);
+ vst1q_u64(lmt_addr + 6, cmd3[0]);
+ vst1q_u64(lmt_addr + 8, cmd0[1]);
+ vst1q_u64(lmt_addr + 10, cmd2[1]);
+ vst1q_u64(lmt_addr + 12, cmd1[1]);
+ vst1q_u64(lmt_addr + 14, cmd3[1]);
+ } else {
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd2[0]);
+ vst1q_u64(lmt_addr + 4, cmd1[0]);
+ vst1q_u64(lmt_addr + 6, cmd0[1]);
+ vst1q_u64(lmt_addr + 8, cmd2[1]);
+ vst1q_u64(lmt_addr + 10, cmd1[1]);
+ }
lmt_status = roc_lmt_submit_ldeor(io_addr);
} while (lmt_status == 0);
do {
- vst1q_u64(lmt_addr, cmd0[2]);
- vst1q_u64(lmt_addr + 2, cmd2[2]);
- vst1q_u64(lmt_addr + 4, cmd1[2]);
- vst1q_u64(lmt_addr + 6, cmd0[3]);
- vst1q_u64(lmt_addr + 8, cmd2[3]);
- vst1q_u64(lmt_addr + 10, cmd1[3]);
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ vst1q_u64(lmt_addr, cmd0[2]);
+ vst1q_u64(lmt_addr + 2, cmd2[2]);
+ vst1q_u64(lmt_addr + 4, cmd1[2]);
+ vst1q_u64(lmt_addr + 6, cmd3[2]);
+ vst1q_u64(lmt_addr + 8, cmd0[3]);
+ vst1q_u64(lmt_addr + 10, cmd2[3]);
+ vst1q_u64(lmt_addr + 12, cmd1[3]);
+ vst1q_u64(lmt_addr + 14, cmd3[3]);
+ } else {
+ vst1q_u64(lmt_addr, cmd0[2]);
+ vst1q_u64(lmt_addr + 2, cmd2[2]);
+ vst1q_u64(lmt_addr + 4, cmd1[2]);
+ vst1q_u64(lmt_addr + 6, cmd0[3]);
+ vst1q_u64(lmt_addr + 8, cmd2[3]);
+ vst1q_u64(lmt_addr + 10, cmd1[3]);
+ }
lmt_status = roc_lmt_submit_ldeor(io_addr);
} while (lmt_status == 0);
} else {
diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c
index 5842facb5..9ade66db2 100644
--- a/drivers/net/cnxk/cn9k_tx_vec.c
+++ b/drivers/net/cnxk/cn9k_tx_vec.c
@@ -13,9 +13,8 @@
{ \
uint64_t cmd[sz]; \
\
- /* VLAN, TSTMP, TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
- (flags) & NIX_TX_OFFLOAD_TSO_F) \
+ /* TSO is not supported by vec */ \
+ if ((flags) & NIX_TX_OFFLOAD_TSO_F) \
return 0; \
return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
(flags)); \
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v5 5/6] net/cnxk: enable TSO processing in vector Tx
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 1/6] net/cnxk: add multi seg Rx " pbhagavatula
` (2 preceding siblings ...)
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 4/6] net/cnxk: enable ptp " pbhagavatula
@ 2021-06-29 7:44 ` pbhagavatula
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 6/6] net/cnxk: add multi seg Tx vector routine pbhagavatula
2021-06-29 16:20 ` [dpdk-dev] [PATCH v5 1/6] net/cnxk: add multi seg Rx " Jerin Jacob
5 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-29 7:44 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Enable TSO offload in vector Tx burst function.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/net/cnxk/cn10k_tx.c | 2 +-
drivers/net/cnxk/cn10k_tx.h | 97 +++++++++++++++++++++++++++++++++
drivers/net/cnxk/cn10k_tx_vec.c | 5 +-
drivers/net/cnxk/cn9k_tx.c | 2 +-
drivers/net/cnxk/cn9k_tx.h | 94 ++++++++++++++++++++++++++++++++
drivers/net/cnxk/cn9k_tx_vec.c | 5 +-
6 files changed, 199 insertions(+), 6 deletions(-)
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index c4c3e6570..d06879163 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -67,7 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
+ if (dev->scalar_ena)
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 8af6799ff..26797581e 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -689,6 +689,46 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
#if defined(RTE_ARCH_ARM64)
+static __rte_always_inline void
+cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
+ union nix_send_ext_w0_u *w0, uint64_t ol_flags,
+ const uint64_t flags, const uint64_t lso_tun_fmt)
+{
+ uint16_t lso_sb;
+ uint64_t mask;
+
+ if (!(ol_flags & PKT_TX_TCP_SEG))
+ return;
+
+ mask = -(!w1->il3type);
+ lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
+
+ w0->u |= BIT(14);
+ w0->lso_sb = lso_sb;
+ w0->lso_mps = m->tso_segsz;
+ w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
+ w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
+
+ /* Handle tunnel tso */
+ if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
+ (ol_flags & PKT_TX_TUNNEL_MASK)) {
+ const uint8_t is_udp_tun =
+ (CNXK_NIX_UDP_TUN_BITMASK >>
+ ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
+ 0x1;
+ uint8_t shift = is_udp_tun ? 32 : 0;
+
+ shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
+ shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
+
+ w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
+ w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
+ /* Update format for UDP tunneled packet */
+
+ w0->lso_format = (lso_tun_fmt >> shift);
+ }
+}
+
#define NIX_DESCS_PER_LOOP 4
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
@@ -723,6 +763,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
/* Reduce the cached count */
txq->fc_cache_pkts -= pkts;
+ /* Perform header writes before barrier for TSO */
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ for (i = 0; i < pkts; i++)
+ cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
+ }
senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
senddesc23_w0 = senddesc01_w0;
@@ -781,6 +826,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendmem23_w1 = sendmem01_w1;
}
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ /* Clear the LSO enable bit. */
+ sendext01_w0 = vbicq_u64(sendext01_w0,
+ vdupq_n_u64(BIT_ULL(14)));
+ sendext23_w0 = sendext01_w0;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1430,6 +1482,51 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
}
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ const uint64_t lso_fmt = txq->lso_tun_fmt;
+ uint64_t sx_w0[NIX_DESCS_PER_LOOP];
+ uint64_t sd_w1[NIX_DESCS_PER_LOOP];
+
+ /* Extract SD W1 as we need to set L4 types. */
+ vst1q_u64(sd_w1, senddesc01_w1);
+ vst1q_u64(sd_w1 + 2, senddesc23_w1);
+
+ /* Extract SX W0 as we need to set LSO fields. */
+ vst1q_u64(sx_w0, sendext01_w0);
+ vst1q_u64(sx_w0 + 2, sendext23_w0);
+
+ /* Extract ol_flags. */
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* Prepare individual mbufs. */
+ cn10k_nix_prepare_tso(tx_pkts[0],
+ (union nix_send_hdr_w1_u *)&sd_w1[0],
+ (union nix_send_ext_w0_u *)&sx_w0[0],
+ vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
+
+ cn10k_nix_prepare_tso(tx_pkts[1],
+ (union nix_send_hdr_w1_u *)&sd_w1[1],
+ (union nix_send_ext_w0_u *)&sx_w0[1],
+ vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
+
+ cn10k_nix_prepare_tso(tx_pkts[2],
+ (union nix_send_hdr_w1_u *)&sd_w1[2],
+ (union nix_send_ext_w0_u *)&sx_w0[2],
+ vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
+
+ cn10k_nix_prepare_tso(tx_pkts[3],
+ (union nix_send_hdr_w1_u *)&sd_w1[3],
+ (union nix_send_ext_w0_u *)&sx_w0[3],
+ vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
+
+ senddesc01_w1 = vld1q_u64(sd_w1);
+ senddesc23_w1 = vld1q_u64(sd_w1 + 2);
+
+ sendext01_w0 = vld1q_u64(sx_w0);
+ sendext23_w0 = vld1q_u64(sx_w0 + 2);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index 0b4a4c7ba..34e373750 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -13,8 +13,9 @@
{ \
uint64_t cmd[sz]; \
\
- /* TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_TSO_F) \
+ /* For TSO inner checksum is a must */ \
+ if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
+ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
(flags)); \
diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
index c32681ed4..735e21cc6 100644
--- a/drivers/net/cnxk/cn9k_tx.c
+++ b/drivers/net/cnxk/cn9k_tx.c
@@ -66,7 +66,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
+ if (dev->scalar_ena)
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index cb574a1c1..dca732a9f 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -545,6 +545,43 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
#if defined(RTE_ARCH_ARM64)
+static __rte_always_inline void
+cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
+ union nix_send_ext_w0_u *w0, uint64_t ol_flags,
+ uint64_t flags)
+{
+ uint16_t lso_sb;
+ uint64_t mask;
+
+ if (!(ol_flags & PKT_TX_TCP_SEG))
+ return;
+
+ mask = -(!w1->il3type);
+ lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
+
+ w0->u |= BIT(14);
+ w0->lso_sb = lso_sb;
+ w0->lso_mps = m->tso_segsz;
+ w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
+ w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
+
+ /* Handle tunnel tso */
+ if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
+ (ol_flags & PKT_TX_TUNNEL_MASK)) {
+ const uint8_t is_udp_tun =
+ (CNXK_NIX_UDP_TUN_BITMASK >>
+ ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
+ 0x1;
+
+ w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
+ w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
+ /* Update format for UDP tunneled packet */
+ w0->lso_format += is_udp_tun ? 2 : 6;
+
+ w0->lso_format += !!(ol_flags & PKT_TX_OUTER_IPV6) << 1;
+ }
+}
+
#define NIX_DESCS_PER_LOOP 4
static __rte_always_inline uint16_t
cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
@@ -580,6 +617,12 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
/* Reduce the cached count */
txq->fc_cache_pkts -= pkts;
+ /* Perform header writes before barrier for TSO */
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ for (i = 0; i < pkts; i++)
+ cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
+ }
+
/* Lets commit any changes in the packet here as no further changes
* to the packet will be done unless no fast free is enabled.
*/
@@ -637,6 +680,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendmem23_w1 = sendmem01_w1;
}
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ /* Clear the LSO enable bit. */
+ sendext01_w0 = vbicq_u64(sendext01_w0,
+ vdupq_n_u64(BIT_ULL(14)));
+ sendext23_w0 = sendext01_w0;
+ }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1286,6 +1336,50 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
}
+ if (flags & NIX_TX_OFFLOAD_TSO_F) {
+ uint64_t sx_w0[NIX_DESCS_PER_LOOP];
+ uint64_t sd_w1[NIX_DESCS_PER_LOOP];
+
+ /* Extract SD W1 as we need to set L4 types. */
+ vst1q_u64(sd_w1, senddesc01_w1);
+ vst1q_u64(sd_w1 + 2, senddesc23_w1);
+
+ /* Extract SX W0 as we need to set LSO fields. */
+ vst1q_u64(sx_w0, sendext01_w0);
+ vst1q_u64(sx_w0 + 2, sendext23_w0);
+
+ /* Extract ol_flags. */
+ xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+ /* Prepare individual mbufs. */
+ cn9k_nix_prepare_tso(tx_pkts[0],
+ (union nix_send_hdr_w1_u *)&sd_w1[0],
+ (union nix_send_ext_w0_u *)&sx_w0[0],
+ vgetq_lane_u64(xtmp128, 0), flags);
+
+ cn9k_nix_prepare_tso(tx_pkts[1],
+ (union nix_send_hdr_w1_u *)&sd_w1[1],
+ (union nix_send_ext_w0_u *)&sx_w0[1],
+ vgetq_lane_u64(xtmp128, 1), flags);
+
+ cn9k_nix_prepare_tso(tx_pkts[2],
+ (union nix_send_hdr_w1_u *)&sd_w1[2],
+ (union nix_send_ext_w0_u *)&sx_w0[2],
+ vgetq_lane_u64(ytmp128, 0), flags);
+
+ cn9k_nix_prepare_tso(tx_pkts[3],
+ (union nix_send_hdr_w1_u *)&sd_w1[3],
+ (union nix_send_ext_w0_u *)&sx_w0[3],
+ vgetq_lane_u64(ytmp128, 1), flags);
+
+ senddesc01_w1 = vld1q_u64(sd_w1);
+ senddesc23_w1 = vld1q_u64(sd_w1 + 2);
+
+ sendext01_w0 = vld1q_u64(sx_w0);
+ sendext23_w0 = vld1q_u64(sx_w0 + 2);
+ }
+
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c
index 9ade66db2..56a3e2514 100644
--- a/drivers/net/cnxk/cn9k_tx_vec.c
+++ b/drivers/net/cnxk/cn9k_tx_vec.c
@@ -13,8 +13,9 @@
{ \
uint64_t cmd[sz]; \
\
- /* TSO is not supported by vec */ \
- if ((flags) & NIX_TX_OFFLOAD_TSO_F) \
+ /* For TSO inner checksum is a must */ \
+ if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
+ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
(flags)); \
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v5 6/6] net/cnxk: add multi seg Tx vector routine
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 1/6] net/cnxk: add multi seg Rx " pbhagavatula
` (3 preceding siblings ...)
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 5/6] net/cnxk: enable TSO " pbhagavatula
@ 2021-06-29 7:44 ` pbhagavatula
2021-06-29 16:20 ` [dpdk-dev] [PATCH v5 1/6] net/cnxk: add multi seg Rx " Jerin Jacob
5 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-29 7:44 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add multi segment Tx vector routine.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/net/cnxk/cn10k_tx.c | 20 +-
drivers/net/cnxk/cn10k_tx.h | 389 +++++++++++++++++++++++++--
drivers/net/cnxk/cn10k_tx_vec_mseg.c | 24 ++
drivers/net/cnxk/cn9k_tx.c | 20 +-
drivers/net/cnxk/cn9k_tx.h | 272 ++++++++++++++++++-
drivers/net/cnxk/cn9k_tx_vec_mseg.c | 24 ++
drivers/net/cnxk/meson.build | 6 +-
7 files changed, 710 insertions(+), 45 deletions(-)
create mode 100644 drivers/net/cnxk/cn10k_tx_vec_mseg.c
create mode 100644 drivers/net/cnxk/cn9k_tx_vec_mseg.c
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index d06879163..1f30bab59 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -67,13 +67,23 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena)
+ const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_vec_mseg_##name,
+
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ if (dev->scalar_ena) {
pick_tx_func(eth_dev, nix_eth_tx_burst);
- else
+ if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+ pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
+ } else {
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
-
- if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
- pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
+ if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+ pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg);
+ }
rte_mb();
}
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 26797581e..eb148b8e7 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -42,6 +42,13 @@
} \
} while (0)
+/* Encoded number of segments to number of dwords macro, each value of nb_segs
+ * is encoded as 4bits.
+ */
+#define NIX_SEGDW_MAGIC 0x76654432210ULL
+
+#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
+
#define LMT_OFF(lmt_addr, lmt_num, offset) \
(void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))
@@ -102,6 +109,14 @@ cn10k_nix_tx_steor_data(const uint16_t flags)
return data;
}
+static __rte_always_inline uint8_t
+cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
+{
+ return ((flags & NIX_TX_NEED_EXT_HDR) ?
+ (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
+ 4);
+}
+
static __rte_always_inline uint64_t
cn10k_nix_tx_steor_vec_data(const uint16_t flags)
{
@@ -729,7 +744,244 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
}
}
+static __rte_always_inline void
+cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
+ union nix_send_hdr_w0_u *sh,
+ union nix_send_sg_s *sg, const uint32_t flags)
+{
+ struct rte_mbuf *m_next;
+ uint64_t *slist, sg_u;
+ uint16_t nb_segs;
+ int i = 1;
+
+ sh->total = m->pkt_len;
+ /* Clear sg->u header before use */
+ sg->u &= 0xFC00000000000000;
+ sg_u = sg->u;
+ slist = &cmd[0];
+
+ sg_u = sg_u | ((uint64_t)m->data_len);
+
+ nb_segs = m->nb_segs - 1;
+ m_next = m->next;
+
+ /* Set invert df if buffer is not to be freed by H/W */
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+ sg_u |= (cnxk_nix_prefree_seg(m) << 55);
+ /* Mark mempool object as "put" since it is freed by NIX */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ if (!(sg_u & (1ULL << 55)))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+
+ m = m_next;
+ /* Fill mbuf segments */
+ do {
+ m_next = m->next;
+ sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
+ *slist = rte_mbuf_data_iova(m);
+ /* Set invert df if buffer is not to be freed by H/W */
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+ sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
+ /* Mark mempool object as "put" since it is freed by NIX
+ */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ if (!(sg_u & (1ULL << (i + 55))))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+ slist++;
+ i++;
+ nb_segs--;
+ if (i > 2 && nb_segs) {
+ i = 0;
+ /* Next SG subdesc */
+ *(uint64_t *)slist = sg_u & 0xFC00000000000000;
+ sg->u = sg_u;
+ sg->segs = 3;
+ sg = (union nix_send_sg_s *)slist;
+ sg_u = sg->u;
+ slist++;
+ }
+ m = m_next;
+ } while (nb_segs);
+
+ sg->u = sg_u;
+ sg->segs = i;
+}
+
+static __rte_always_inline void
+cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
+ uint64x2_t *cmd1, const uint8_t segdw,
+ const uint32_t flags)
+{
+ union nix_send_hdr_w0_u sh;
+ union nix_send_sg_s sg;
+
+ if (m->nb_segs == 1) {
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+ sg.u |= (cnxk_nix_prefree_seg(m) << 55);
+ cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
+ }
+
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+ if (!(sg.u & (1ULL << 55)))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+ return;
+ }
+
+ sh.u = vgetq_lane_u64(cmd0[0], 0);
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+
+ cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
+
+ sh.sizem1 = segdw - 1;
+ cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
+ cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
+}
+
#define NIX_DESCS_PER_LOOP 4
+
+static __rte_always_inline uint8_t
+cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
+ uint64x2_t *cmd1, uint64x2_t *cmd2,
+ uint64x2_t *cmd3, uint8_t *segdw,
+ uint64_t *lmt_addr, __uint128_t *data128,
+ uint8_t *shift, const uint16_t flags)
+{
+ uint8_t j, off, lmt_used;
+
+ if (!(flags & NIX_TX_NEED_EXT_HDR) &&
+ !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
+ /* No segments in 4 consecutive packets. */
+ if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
+ for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
+ cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd1[0]);
+ vst1q_u64(lmt_addr + 4, cmd0[1]);
+ vst1q_u64(lmt_addr + 6, cmd1[1]);
+ vst1q_u64(lmt_addr + 8, cmd0[2]);
+ vst1q_u64(lmt_addr + 10, cmd1[2]);
+ vst1q_u64(lmt_addr + 12, cmd0[3]);
+ vst1q_u64(lmt_addr + 14, cmd1[3]);
+
+ *data128 |= ((__uint128_t)7) << *shift;
+ shift += 3;
+
+ return 1;
+ }
+ }
+
+ lmt_used = 0;
+ for (j = 0; j < NIX_DESCS_PER_LOOP;) {
+ /* Fit consecutive packets in same LMTLINE. */
+ if ((segdw[j] + segdw[j + 1]) <= 8) {
+ if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+ cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
+ &cmd0[j + 1],
+ &cmd1[j + 1],
+ segdw[j + 1], flags);
+ /* TSTAMP takes 4 each, no segs. */
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ vst1q_u64(lmt_addr + 6, cmd3[j]);
+
+ vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
+ vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
+ vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
+ /* EXT header take 3 each, space for 2 segs.*/
+ cn10k_nix_prepare_mseg_vec(mbufs[j],
+ lmt_addr + 6,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ off = segdw[j] - 3;
+ off <<= 1;
+ cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
+ lmt_addr + 12 + off,
+ &cmd0[j + 1],
+ &cmd1[j + 1],
+ segdw[j + 1], flags);
+ vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
+ vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
+ } else {
+ cn10k_nix_prepare_mseg_vec(mbufs[j],
+ lmt_addr + 4,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd1[j]);
+ off = segdw[j] - 2;
+ off <<= 1;
+ cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
+ lmt_addr + 8 + off,
+ &cmd0[j + 1],
+ &cmd1[j + 1],
+ segdw[j + 1], flags);
+ vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
+ }
+ *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
+ << *shift;
+ *shift += 3;
+ j += 2;
+ } else {
+ if ((flags & NIX_TX_NEED_EXT_HDR) &&
+ (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
+ cn10k_nix_prepare_mseg_vec(mbufs[j],
+ lmt_addr + 6,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ off = segdw[j] - 4;
+ off <<= 1;
+ vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
+ cn10k_nix_prepare_mseg_vec(mbufs[j],
+ lmt_addr + 6,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ } else {
+ cn10k_nix_prepare_mseg_vec(mbufs[j],
+ lmt_addr + 4,
+ &cmd0[j], &cmd1[j],
+ segdw[j], flags);
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd1[j]);
+ }
+ *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
+ *shift += 3;
+ j++;
+ }
+ lmt_used++;
+ lmt_addr += 16;
+ }
+
+ return lmt_used;
+}
+
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t pkts, uint64_t *cmd, const uint16_t flags)
@@ -738,7 +990,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
- uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
+ uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
uint16_t left, scalar, burst, i, lmt_id;
@@ -746,6 +998,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64x2_t sendext01_w1, sendext23_w1;
uint64x2_t sendmem01_w0, sendmem23_w0;
uint64x2_t sendmem01_w1, sendmem23_w1;
+ uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn10k_eth_txq *txq = tx_queue;
@@ -754,7 +1007,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64x2_t ltypes01, ltypes23;
uint64x2_t xtmp128, ytmp128;
uint64x2_t xmask01, xmask23;
- uint8_t lnum;
+ uint8_t lnum, shift;
+ union wdata {
+ __uint128_t data128;
+ uint64_t data[2];
+ } wd;
NIX_XMIT_FC_OR_RETURN(txq, pkts);
@@ -798,8 +1055,44 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
cn10k_nix_pkts_per_vec_brst(flags) :
left;
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ wd.data128 = 0;
+ shift = 16;
+ }
lnum = 0;
+
for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ uint8_t j;
+
+ for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
+ struct rte_mbuf *m = tx_pkts[j];
+
+ /* Get dwords based on nb_segs. */
+ segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
+ /* Add dwords based on offloads. */
+ segdw[j] += 1 + /* SEND HDR */
+ !!(flags & NIX_TX_NEED_EXT_HDR) +
+ !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
+ }
+
+ /* Check if there are enough LMTLINES for this loop */
+ if (lnum + 4 > 32) {
+ uint8_t ldwords_con = 0, lneeded = 0;
+ for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
+ ldwords_con += segdw[j];
+ if (ldwords_con > 8) {
+ lneeded += 1;
+ ldwords_con = segdw[j];
+ }
+ }
+ lneeded += 1;
+ if (lnum + lneeded > 32) {
+ burst = i;
+ break;
+ }
+ }
+ }
/* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
senddesc01_w0 =
vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
@@ -1527,7 +1820,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w0 = vld1q_u64(sx_w0 + 2);
}
- if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
+ !(flags & NIX_TX_MULTI_SEG_F)) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
xmask23 = xmask01;
@@ -1567,7 +1861,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
(void **)&mbuf3, 1, 0);
senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
- } else {
+ } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1612,7 +1906,19 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
}
- if (flags & NIX_TX_NEED_EXT_HDR) {
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ uint8_t j;
+
+ segdw[4] = 8;
+ j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
+ cmd2, cmd3, segdw,
+ (uint64_t *)
+ LMT_OFF(laddr, lnum,
+ 0),
+ &wd.data128, &shift,
+ flags);
+ lnum += j;
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
/* Store the prepared send desc to LMT lines */
if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
@@ -1664,34 +1970,55 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
}
+ if (flags & NIX_TX_MULTI_SEG_F)
+ wd.data[0] >>= 16;
+
/* Trigger LMTST */
if (lnum > 16) {
- data = cn10k_nix_tx_steor_vec_data(flags);
- pa = io_addr | (data & 0x7) << 4;
- data &= ~0x7ULL;
- data |= (15ULL << 12);
- data |= (uint64_t)lmt_id;
+ if (!(flags & NIX_TX_MULTI_SEG_F))
+ wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
+
+ pa = io_addr | (wd.data[0] & 0x7) << 4;
+ wd.data[0] &= ~0x7ULL;
+
+ if (flags & NIX_TX_MULTI_SEG_F)
+ wd.data[0] <<= 16;
+
+ wd.data[0] |= (15ULL << 12);
+ wd.data[0] |= (uint64_t)lmt_id;
/* STEOR0 */
- roc_lmt_submit_steorl(data, pa);
+ roc_lmt_submit_steorl(wd.data[0], pa);
- data = cn10k_nix_tx_steor_vec_data(flags);
- pa = io_addr | (data & 0x7) << 4;
- data &= ~0x7ULL;
- data |= ((uint64_t)(lnum - 17)) << 12;
- data |= (uint64_t)(lmt_id + 16);
+ if (!(flags & NIX_TX_MULTI_SEG_F))
+ wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
+
+ pa = io_addr | (wd.data[1] & 0x7) << 4;
+ wd.data[1] &= ~0x7ULL;
+
+ if (flags & NIX_TX_MULTI_SEG_F)
+ wd.data[1] <<= 16;
+
+ wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
+ wd.data[1] |= (uint64_t)(lmt_id + 16);
/* STEOR1 */
- roc_lmt_submit_steorl(data, pa);
+ roc_lmt_submit_steorl(wd.data[1], pa);
} else if (lnum) {
- data = cn10k_nix_tx_steor_vec_data(flags);
- pa = io_addr | (data & 0x7) << 4;
- data &= ~0x7ULL;
- data |= ((uint64_t)(lnum - 1)) << 12;
- data |= lmt_id;
+ if (!(flags & NIX_TX_MULTI_SEG_F))
+ wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
+
+ pa = io_addr | (wd.data[0] & 0x7) << 4;
+ wd.data[0] &= ~0x7ULL;
+
+ if (flags & NIX_TX_MULTI_SEG_F)
+ wd.data[0] <<= 16;
+
+ wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
+ wd.data[0] |= lmt_id;
/* STEOR0 */
- roc_lmt_submit_steorl(data, pa);
+ roc_lmt_submit_steorl(wd.data[0], pa);
}
left -= burst;
@@ -1699,9 +2026,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (left)
goto again;
- if (unlikely(scalar))
- pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd,
- flags);
+ if (unlikely(scalar)) {
+ if (flags & NIX_TX_MULTI_SEG_F)
+ pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
+ scalar, cmd, flags);
+ else
+ pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
+ cmd, flags);
+ }
return pkts;
}
@@ -1866,7 +2198,10 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
\
uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
- void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
+ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
+ \
+ uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
+ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
NIX_TX_FASTPATH_MODES
#undef T
diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
new file mode 100644
index 000000000..1fad81dba
--- /dev/null
+++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_ethdev.h"
+#include "cn10k_tx.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
+ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
+ { \
+ uint64_t cmd[sz]; \
+ \
+ /* For TSO inner checksum is a must */ \
+ if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
+ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
+ return 0; \
+ return cn10k_nix_xmit_pkts_vector( \
+ tx_queue, tx_pkts, pkts, cmd, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
index 735e21cc6..763f9a14f 100644
--- a/drivers/net/cnxk/cn9k_tx.c
+++ b/drivers/net/cnxk/cn9k_tx.c
@@ -66,13 +66,23 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
#undef T
};
- if (dev->scalar_ena)
+ const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_nix_xmit_pkts_vec_mseg_##name,
+
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ if (dev->scalar_ena) {
pick_tx_func(eth_dev, nix_eth_tx_burst);
- else
+ if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+ pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
+ } else {
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
-
- if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
- pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
+ if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+ pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg);
+ }
rte_mb();
}
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index dca732a9f..ed65cd351 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -582,7 +582,238 @@ cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
}
}
+static __rte_always_inline uint8_t
+cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
+ union nix_send_hdr_w0_u *sh,
+ union nix_send_sg_s *sg, const uint32_t flags)
+{
+ struct rte_mbuf *m_next;
+ uint64_t *slist, sg_u;
+ uint16_t nb_segs;
+ uint64_t segdw;
+ int i = 1;
+
+ sh->total = m->pkt_len;
+ /* Clear sg->u header before use */
+ sg->u &= 0xFC00000000000000;
+ sg_u = sg->u;
+ slist = &cmd[0];
+
+ sg_u = sg_u | ((uint64_t)m->data_len);
+
+ nb_segs = m->nb_segs - 1;
+ m_next = m->next;
+
+ /* Set invert df if buffer is not to be freed by H/W */
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+ sg_u |= (cnxk_nix_prefree_seg(m) << 55);
+ /* Mark mempool object as "put" since it is freed by NIX */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ if (!(sg_u & (1ULL << 55)))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+
+ m = m_next;
+ /* Fill mbuf segments */
+ do {
+ m_next = m->next;
+ sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
+ *slist = rte_mbuf_data_iova(m);
+ /* Set invert df if buffer is not to be freed by H/W */
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+ sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
+ /* Mark mempool object as "put" since it is freed by NIX
+ */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ if (!(sg_u & (1ULL << (i + 55))))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+ slist++;
+ i++;
+ nb_segs--;
+ if (i > 2 && nb_segs) {
+ i = 0;
+ /* Next SG subdesc */
+ *(uint64_t *)slist = sg_u & 0xFC00000000000000;
+ sg->u = sg_u;
+ sg->segs = 3;
+ sg = (union nix_send_sg_s *)slist;
+ sg_u = sg->u;
+ slist++;
+ }
+ m = m_next;
+ } while (nb_segs);
+
+ sg->u = sg_u;
+ sg->segs = i;
+ segdw = (uint64_t *)slist - (uint64_t *)&cmd[0];
+
+ segdw += 2;
+ /* Roundup extra dwords to multiple of 2 */
+ segdw = (segdw >> 1) + (segdw & 0x1);
+ /* Default dwords */
+ segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +
+ !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
+ sh->sizem1 = segdw - 1;
+
+ return segdw;
+}
+
+static __rte_always_inline uint8_t
+cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
+ uint64x2_t *cmd1, const uint32_t flags)
+{
+ union nix_send_hdr_w0_u sh;
+ union nix_send_sg_s sg;
+ uint8_t ret;
+
+ if (m->nb_segs == 1) {
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+ sg.u |= (cnxk_nix_prefree_seg(m) << 55);
+ cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
+ }
+
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+ if (!(sg.u & (1ULL << 55)))
+ __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+ rte_io_wmb();
+#endif
+ return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) +
+ !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
+ }
+
+ sh.u = vgetq_lane_u64(cmd0[0], 0);
+ sg.u = vgetq_lane_u64(cmd1[0], 0);
+
+ ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
+
+ cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
+ cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
+ return ret;
+}
+
#define NIX_DESCS_PER_LOOP 4
+
+static __rte_always_inline void
+cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1,
+ uint64x2_t *cmd2, uint64x2_t *cmd3,
+ uint8_t *segdw,
+ uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2],
+ uint64_t *lmt_addr, rte_iova_t io_addr,
+ const uint32_t flags)
+{
+ uint64_t lmt_status;
+ uint8_t j, off;
+
+ if (!(flags & NIX_TX_NEED_EXT_HDR) &&
+ !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
+ /* No segments in 4 consecutive packets. */
+ if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
+ do {
+ vst1q_u64(lmt_addr, cmd0[0]);
+ vst1q_u64(lmt_addr + 2, cmd1[0]);
+ vst1q_u64(lmt_addr + 4, cmd0[1]);
+ vst1q_u64(lmt_addr + 6, cmd1[1]);
+ vst1q_u64(lmt_addr + 8, cmd0[2]);
+ vst1q_u64(lmt_addr + 10, cmd1[2]);
+ vst1q_u64(lmt_addr + 12, cmd0[3]);
+ vst1q_u64(lmt_addr + 14, cmd1[3]);
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ } while (lmt_status == 0);
+
+ return;
+ }
+ }
+
+ for (j = 0; j < NIX_DESCS_PER_LOOP;) {
+ /* Fit consecutive packets in same LMTLINE. */
+ if ((segdw[j] + segdw[j + 1]) <= 8) {
+again0:
+ if ((flags & NIX_TX_NEED_EXT_HDR) &&
+ (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 4;
+ roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
+ off <<= 1;
+ vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
+
+ vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]);
+ vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]);
+ roc_lmt_mov_seg(lmt_addr + 14 + off,
+ slist[j + 1], segdw[j + 1] - 4);
+ off += ((segdw[j + 1] - 4) << 1);
+ vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]);
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 3;
+ roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
+ off <<= 1;
+ vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
+ vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
+ roc_lmt_mov_seg(lmt_addr + 12 + off,
+ slist[j + 1], segdw[j + 1] - 3);
+ } else {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 2;
+ roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
+ off <<= 1;
+ vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
+ vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
+ roc_lmt_mov_seg(lmt_addr + 8 + off,
+ slist[j + 1], segdw[j + 1] - 2);
+ }
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ if (lmt_status == 0)
+ goto again0;
+ j += 2;
+ } else {
+again1:
+ if ((flags & NIX_TX_NEED_EXT_HDR) &&
+ (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 4;
+ roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
+ off <<= 1;
+ vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd2[j]);
+ vst1q_u64(lmt_addr + 4, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 3;
+ roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
+ } else {
+ vst1q_u64(lmt_addr, cmd0[j]);
+ vst1q_u64(lmt_addr + 2, cmd1[j]);
+ /* Copy segs */
+ off = segdw[j] - 2;
+ roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
+ }
+ lmt_status = roc_lmt_submit_ldeor(io_addr);
+ if (lmt_status == 0)
+ goto again1;
+ j += 1;
+ }
+ }
+}
+
static __rte_always_inline uint16_t
cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t pkts, uint64_t *cmd, const uint16_t flags)
@@ -1380,7 +1611,8 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
sendext23_w0 = vld1q_u64(sx_w0 + 2);
}
- if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
+ !(flags & NIX_TX_MULTI_SEG_F)) {
/* Set don't free bit if reference count > 1 */
xmask01 = vdupq_n_u64(0);
xmask23 = xmask01;
@@ -1424,7 +1656,7 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
* cnxk_nix_prefree_seg are written before LMTST.
*/
rte_io_wmb();
- } else {
+ } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1472,7 +1704,27 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
}
- if (flags & NIX_TX_NEED_EXT_HDR) {
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ uint64_t seg_list[NIX_DESCS_PER_LOOP]
+ [CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+ uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1];
+
+ /* Build mseg list for each packet individually. */
+ for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
+ segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j],
+ seg_list[j], &cmd0[j],
+ &cmd1[j], flags);
+ segdw[4] = 8;
+
+ /* Commit all changes to mbuf before LMTST. */
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+ rte_io_wmb();
+
+ cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3,
+ segdw, seg_list,
+ lmt_addr, io_addr,
+ flags);
+ } else if (flags & NIX_TX_NEED_EXT_HDR) {
/* With ext header in the command we can no longer send
* all 4 packets together since LMTLINE is 128bytes.
* Split and Tx twice.
@@ -1534,9 +1786,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
}
- if (unlikely(pkts_left))
- pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd,
- flags);
+ if (unlikely(pkts_left)) {
+ if (flags & NIX_TX_MULTI_SEG_F)
+ pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
+ pkts_left, cmd, flags);
+ else
+ pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left,
+ cmd, flags);
+ }
return pkts;
}
@@ -1701,6 +1958,9 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
\
uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \
+ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
+ \
+ uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \
void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn9k_tx_vec_mseg.c b/drivers/net/cnxk/cn9k_tx_vec_mseg.c
new file mode 100644
index 000000000..0256efd45
--- /dev/null
+++ b/drivers/net/cnxk/cn9k_tx_vec_mseg.c
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_ethdev.h"
+#include "cn9k_tx.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \
+ void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
+ { \
+ uint64_t cmd[sz]; \
+ \
+ /* For TSO inner checksum is a must */ \
+ if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
+ !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
+ return 0; \
+ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
+ (flags) | \
+ NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
index aa8c7253f..361f7ce84 100644
--- a/drivers/net/cnxk/meson.build
+++ b/drivers/net/cnxk/meson.build
@@ -26,7 +26,8 @@ sources += files('cn9k_ethdev.c',
'cn9k_rx_vec_mseg.c',
'cn9k_tx.c',
'cn9k_tx_mseg.c',
- 'cn9k_tx_vec.c')
+ 'cn9k_tx_vec.c',
+ 'cn9k_tx_vec_mseg.c')
# CN10K
sources += files('cn10k_ethdev.c',
'cn10k_rte_flow.c',
@@ -36,7 +37,8 @@ sources += files('cn10k_ethdev.c',
'cn10k_rx_vec_mseg.c',
'cn10k_tx.c',
'cn10k_tx_mseg.c',
- 'cn10k_tx_vec.c')
+ 'cn10k_tx_vec.c',
+ 'cn10k_tx_vec_mseg.c')
deps += ['bus_pci', 'cryptodev', 'eventdev', 'security']
deps += ['common_cnxk', 'mempool_cnxk']
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* Re: [dpdk-dev] [PATCH v5 1/6] net/cnxk: add multi seg Rx vector routine
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 1/6] net/cnxk: add multi seg Rx " pbhagavatula
` (4 preceding siblings ...)
2021-06-29 7:44 ` [dpdk-dev] [PATCH v5 6/6] net/cnxk: add multi seg Tx vector routine pbhagavatula
@ 2021-06-29 16:20 ` Jerin Jacob
5 siblings, 0 replies; 93+ messages in thread
From: Jerin Jacob @ 2021-06-29 16:20 UTC (permalink / raw)
To: Pavan Nikhilesh
Cc: Jerin Jacob, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
Satha Rao, dpdk-dev
On Tue, Jun 29, 2021 at 1:14 PM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Add multi-segment Rx vector routine, form the primary mbufs using
> vector path switch to scalar path when extracting segments.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> Series-acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
Series applied to dpdk-next-net-mrvl/for-dpdk-main. Thanks.
> ---
> v5 Changes:
> - Fix incorrect mbuf assignment.
> v4 Changes:
> - Split patches for easier merge.
> - Rebase on dpdk-next-net-mrvl.
> v3 Changes:
> - Spell check.
>
> drivers/net/cnxk/cn10k_rx.c | 31 +++++++++++------
> drivers/net/cnxk/cn10k_rx.h | 51 +++++++++++++++++++++-------
> drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++
> drivers/net/cnxk/cn9k_rx.c | 31 +++++++++++------
> drivers/net/cnxk/cn9k_rx.h | 51 +++++++++++++++++++++-------
> drivers/net/cnxk/cn9k_rx_vec_mseg.c | 18 ++++++++++
> drivers/net/cnxk/meson.build | 2 ++
> 7 files changed, 157 insertions(+), 44 deletions(-)
> create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c
> create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c
>
> diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c
> index 5c956c06b..3a9fd7130 100644
> --- a/drivers/net/cnxk/cn10k_rx.c
> +++ b/drivers/net/cnxk/cn10k_rx.c
> @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
> +
> + rte_atomic_thread_fence(__ATOMIC_RELEASE);
> }
>
> void
> @@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
> #undef R
> };
>
> - /* For PTP enabled, scalar rx function should be chosen as most of the
> - * PTP apps are implemented to rx burst 1 pkt.
> - */
> - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
> - pick_rx_func(eth_dev, nix_eth_rx_burst);
> - else
> - pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
> +#define R(name, f5, f4, f3, f2, f1, f0, flags) \
> + [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name,
>
> - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> + NIX_RX_FASTPATH_MODES
> +#undef R
> + };
>
> /* Copy multi seg version with no offload for tear down sequence */
> if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> dev->rx_pkt_burst_no_offload =
> nix_eth_rx_burst_mseg[0][0][0][0][0][0];
> - rte_mb();
> +
> + /* For PTP enabled, scalar rx function should be chosen as most of the
> + * PTP apps are implemented to rx burst 1 pkt.
> + */
> + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
> + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> + return pick_rx_func(eth_dev, nix_eth_rx_burst);
> + }
> +
> + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
> + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> }
> diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
> index 1cc37cbaa..5926ff7f4 100644
> --- a/drivers/net/cnxk/cn10k_rx.h
> +++ b/drivers/net/cnxk/cn10k_rx.h
> @@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
>
> sg = *(const uint64_t *)(rx + 1);
> nb_segs = (sg >> 48) & 0x3;
> - mbuf->nb_segs = nb_segs;
> +
> + if (nb_segs == 1) {
> + mbuf->next = NULL;
> + return;
> + }
> +
> + mbuf->pkt_len = rx->pkt_lenm1 + 1;
> mbuf->data_len = sg & 0xFFFF;
> + mbuf->nb_segs = nb_segs;
> sg = sg >> 16;
>
> eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1));
> @@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
> ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf);
>
> mbuf->ol_flags = ol_flags;
> - *(uint64_t *)(&mbuf->rearm_data) = val;
> mbuf->pkt_len = len;
> + mbuf->data_len = len;
> + *(uint64_t *)(&mbuf->rearm_data) = val;
>
> - if (flag & NIX_RX_MULTI_SEG_F) {
> + if (flag & NIX_RX_MULTI_SEG_F)
> nix_cqe_xtract_mseg(rx, mbuf, val);
> - } else {
> - mbuf->data_len = len;
> + else
> mbuf->next = NULL;
> - }
> }
>
> static inline uint16_t
> @@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
> vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
> vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
>
> - /* Update that no more segments */
> - mbuf0->next = NULL;
> - mbuf1->next = NULL;
> - mbuf2->next = NULL;
> - mbuf3->next = NULL;
> -
> /* Store the mbufs to rx_pkts */
> vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
> vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
>
> + if (flags & NIX_RX_MULTI_SEG_F) {
> + /* Multi segment is enable build mseg list for
> + * individual mbufs in scalar mode.
> + */
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(0) + 8), mbuf0,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(1) + 8), mbuf1,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(2) + 8), mbuf2,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(3) + 8), mbuf3,
> + mbuf_initializer);
> + } else {
> + /* Update that no more segments */
> + mbuf0->next = NULL;
> + mbuf1->next = NULL;
> + mbuf2->next = NULL;
> + mbuf3->next = NULL;
> + }
> +
> /* Prefetch mbufs */
> roc_prefetch_store_keep(mbuf0);
> roc_prefetch_store_keep(mbuf1);
> @@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \
> void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
> \
> uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name( \
> + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
> + \
> + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
> void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
>
> NIX_RX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
> new file mode 100644
> index 000000000..04d1e46c8
> --- /dev/null
> +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2021 Marvell.
> + */
> +
> +#include "cn10k_ethdev.h"
> +#include "cn10k_rx.h"
> +
> +#define R(name, f5, f4, f3, f2, f1, f0, flags) \
> + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
> + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
> + { \
> + return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
> + (flags) | NIX_RX_MULTI_SEG_F); \
> + }
> +
> +NIX_RX_FASTPATH_MODES
> +#undef R
> diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c
> index 0acedd0a1..d293d4eac 100644
> --- a/drivers/net/cnxk/cn9k_rx.c
> +++ b/drivers/net/cnxk/cn9k_rx.c
> @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
> +
> + rte_atomic_thread_fence(__ATOMIC_RELEASE);
> }
>
> void
> @@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
> #undef R
> };
>
> - /* For PTP enabled, scalar rx function should be chosen as most of the
> - * PTP apps are implemented to rx burst 1 pkt.
> - */
> - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
> - pick_rx_func(eth_dev, nix_eth_rx_burst);
> - else
> - pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
> +#define R(name, f5, f4, f3, f2, f1, f0, flags) \
> + [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name,
>
> - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> + NIX_RX_FASTPATH_MODES
> +#undef R
> + };
>
> /* Copy multi seg version with no offload for tear down sequence */
> if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> dev->rx_pkt_burst_no_offload =
> nix_eth_rx_burst_mseg[0][0][0][0][0][0];
> - rte_mb();
> +
> + /* For PTP enabled, scalar rx function should be chosen as most of the
> + * PTP apps are implemented to rx burst 1 pkt.
> + */
> + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
> + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> + return pick_rx_func(eth_dev, nix_eth_rx_burst);
> + }
> +
> + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
> + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> }
> diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
> index 10ef5c690..5ae9e8195 100644
> --- a/drivers/net/cnxk/cn9k_rx.h
> +++ b/drivers/net/cnxk/cn9k_rx.h
> @@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
>
> sg = *(const uint64_t *)(rx + 1);
> nb_segs = (sg >> 48) & 0x3;
> - mbuf->nb_segs = nb_segs;
> +
> + if (nb_segs == 1) {
> + mbuf->next = NULL;
> + return;
> + }
> +
> + mbuf->pkt_len = rx->pkt_lenm1 + 1;
> mbuf->data_len = sg & 0xFFFF;
> + mbuf->nb_segs = nb_segs;
> sg = sg >> 16;
>
> eol = ((const rte_iova_t *)(rx + 1) +
> @@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
> nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf);
>
> mbuf->ol_flags = ol_flags;
> - *(uint64_t *)(&mbuf->rearm_data) = val;
> mbuf->pkt_len = len;
> + mbuf->data_len = len;
> + *(uint64_t *)(&mbuf->rearm_data) = val;
>
> - if (flag & NIX_RX_MULTI_SEG_F) {
> + if (flag & NIX_RX_MULTI_SEG_F)
> nix_cqe_xtract_mseg(rx, mbuf, val);
> - } else {
> - mbuf->data_len = len;
> + else
> mbuf->next = NULL;
> - }
> }
>
> static inline uint16_t
> @@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
> vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
> vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
>
> - /* Update that no more segments */
> - mbuf0->next = NULL;
> - mbuf1->next = NULL;
> - mbuf2->next = NULL;
> - mbuf3->next = NULL;
> -
> /* Store the mbufs to rx_pkts */
> vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
> vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
>
> + if (flags & NIX_RX_MULTI_SEG_F) {
> + /* Multi segment is enable build mseg list for
> + * individual mbufs in scalar mode.
> + */
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(0) + 8), mbuf0,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(1) + 8), mbuf1,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(2) + 8), mbuf2,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(3) + 8), mbuf3,
> + mbuf_initializer);
> + } else {
> + /* Update that no more segments */
> + mbuf0->next = NULL;
> + mbuf1->next = NULL;
> + mbuf2->next = NULL;
> + mbuf3->next = NULL;
> + }
> +
> /* Prefetch mbufs */
> roc_prefetch_store_keep(mbuf0);
> roc_prefetch_store_keep(mbuf1);
> @@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \
> void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
> \
> uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \
> + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
> + \
> + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \
> void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
>
> NIX_RX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
> new file mode 100644
> index 000000000..e46d8a474
> --- /dev/null
> +++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2021 Marvell.
> + */
> +
> +#include "cn9k_ethdev.h"
> +#include "cn9k_rx.h"
> +
> +#define R(name, f5, f4, f3, f2, f1, f0, flags) \
> + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \
> + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
> + { \
> + return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
> + (flags) | \
> + NIX_RX_MULTI_SEG_F); \
> + }
> +
> +NIX_RX_FASTPATH_MODES
> +#undef R
> diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
> index 2071d0dcb..aa8c7253f 100644
> --- a/drivers/net/cnxk/meson.build
> +++ b/drivers/net/cnxk/meson.build
> @@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c',
> 'cn9k_rx.c',
> 'cn9k_rx_mseg.c',
> 'cn9k_rx_vec.c',
> + 'cn9k_rx_vec_mseg.c',
> 'cn9k_tx.c',
> 'cn9k_tx_mseg.c',
> 'cn9k_tx_vec.c')
> @@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c',
> 'cn10k_rx.c',
> 'cn10k_rx_mseg.c',
> 'cn10k_rx_vec.c',
> + 'cn10k_rx_vec_mseg.c',
> 'cn10k_tx.c',
> 'cn10k_tx_mseg.c',
> 'cn10k_tx_vec.c')
> --
> 2.17.1
>
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v4 1/7] event/cnxk: add Rx adapter support
2021-06-20 20:28 ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
` (13 preceding siblings ...)
2021-06-28 19:41 ` [dpdk-dev] [PATCH v4 1/6] " pbhagavatula
@ 2021-06-28 19:52 ` pbhagavatula
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
` (6 more replies)
14 siblings, 7 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-28 19:52 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Ray Kinsella,
Neil Horman
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Rx adapter.
Resize cn10k workslot fastpath structure to fit in 64B cacheline size.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v4 Changes:
- Split patches for easier merge.
v3 Changes:
- Spell check.
doc/guides/eventdevs/cnxk.rst | 28 ++++
doc/guides/rel_notes/release_21_08.rst | 5 +
drivers/common/cnxk/roc_nix.h | 3 +
drivers/common/cnxk/roc_nix_fc.c | 78 ++++++++++
drivers/common/cnxk/roc_nix_priv.h | 3 +-
drivers/common/cnxk/version.map | 1 +
drivers/event/cnxk/cn10k_eventdev.c | 107 +++++++++++---
drivers/event/cnxk/cn10k_worker.c | 7 +-
drivers/event/cnxk/cn10k_worker.h | 32 +++--
drivers/event/cnxk/cn9k_eventdev.c | 89 ++++++++++++
drivers/event/cnxk/cn9k_worker.h | 4 +
drivers/event/cnxk/cnxk_eventdev.c | 2 +
drivers/event/cnxk/cnxk_eventdev.h | 43 ++++--
drivers/event/cnxk/cnxk_eventdev_adptr.c | 176 +++++++++++++++++++++++
drivers/event/cnxk/meson.build | 9 +-
15 files changed, 540 insertions(+), 47 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 36da3800c..b7e82c127 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -39,6 +39,10 @@ Features of the OCTEON cnxk SSO PMD are:
time granularity of 2.5us on CN9K and 1us on CN10K.
- Up to 256 TIM rings a.k.a event timer adapters.
- Up to 8 rings traversed in parallel.
+- HW managed packets enqueued from ethdev to eventdev exposed through event eth
+ RX adapter.
+- N:1 ethernet device Rx queue to Event queue mapping.
+- Full Rx offload support defined through ethdev queue configuration.
Prerequisites and Compilation procedure
---------------------------------------
@@ -93,6 +97,15 @@ Runtime Config Options
-a 0002:0e:00.0,qos=[1-50-50-50]
+- ``Force Rx Back pressure``
+
+ Force Rx back pressure when same mempool is used across ethernet device
+ connected to event device.
+
+ For example::
+
+ -a 0002:0e:00.0,force_rx_bp=1
+
- ``TIM disable NPA``
By default chunks are allocated from NPA then TIM can automatically free
@@ -160,3 +173,18 @@ Debugging Options
+---+------------+-------------------------------------------------------+
| 2 | TIM | --log-level='pmd\.event\.cnxk\.timer,8' |
+---+------------+-------------------------------------------------------+
+
+Limitations
+-----------
+
+Rx adapter support
+~~~~~~~~~~~~~~~~~~
+
+Using the same mempool for all the ethernet device ports connected to
+event device would cause back pressure to be asserted only on the first
+ethernet device.
+Back pressure is automatically disabled when using same mempool for all the
+ethernet devices connected to event device to override this applications can
+use `force_rx_bp=1` device arguments.
+Using unique mempool per each ethernet device is recommended when they are
+connected to event device.
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 31e49e1a5..3892c8017 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -60,6 +60,11 @@ New Features
* Added net/cnxk driver which provides the support for the integrated ethernet
device.
+* **Added support for Marvell CN10K, CN9K, event Rx adapter.**
+
+ * Added Rx adapter support for event/cnxk when the ethernet device requested is
+ net/cnxk.
+
Removed Items
-------------
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index bb6902795..76613fe84 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix,
enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix);
+void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id,
+ uint8_t ena, uint8_t force);
+
/* NPC */
int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable);
diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c
index 47be8aa3f..f17eba416 100644
--- a/drivers/common/cnxk/roc_nix_fc.c
+++ b/drivers/common/cnxk/roc_nix_fc.c
@@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode mode)
exit:
return rc;
}
+
+void
+rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena,
+ uint8_t force)
+{
+ struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+ struct npa_lf *lf = idev_npa_obj_get();
+ struct npa_aq_enq_req *req;
+ struct npa_aq_enq_rsp *rsp;
+ struct mbox *mbox;
+ uint32_t limit;
+ int rc;
+
+ if (roc_nix_is_sdp(roc_nix))
+ return;
+
+ if (!lf)
+ return;
+ mbox = lf->mbox;
+
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_READ;
+
+ rc = mbox_process_msg(mbox, (void *)&rsp);
+ if (rc)
+ return;
+
+ limit = rsp->aura.limit;
+ /* BP is already enabled. */
+ if (rsp->aura.bp_ena) {
+ /* If BP ids don't match disable BP. */
+ if ((rsp->aura.nix0_bpid != nix->bpid[0]) && !force) {
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_WRITE;
+
+ req->aura.bp_ena = 0;
+ req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena);
+
+ mbox_process(mbox);
+ }
+ return;
+ }
+
+ /* BP was previously enabled but now disabled skip. */
+ if (rsp->aura.bp)
+ return;
+
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_WRITE;
+
+ if (ena) {
+ req->aura.nix0_bpid = nix->bpid[0];
+ req->aura_mask.nix0_bpid = ~(req->aura_mask.nix0_bpid);
+ req->aura.bp = NIX_RQ_AURA_THRESH(
+ limit > 128 ? 256 : limit); /* 95% of size*/
+ req->aura_mask.bp = ~(req->aura_mask.bp);
+ }
+
+ req->aura.bp_ena = !!ena;
+ req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena);
+
+ mbox_process(mbox);
+}
diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h
index d9c32df44..9dc0c88a6 100644
--- a/drivers/common/cnxk/roc_nix_priv.h
+++ b/drivers/common/cnxk/roc_nix_priv.h
@@ -16,7 +16,8 @@
#define NIX_SQB_LOWER_THRESH ((uint16_t)70)
/* Apply BP/DROP when CQ is 95% full */
-#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100)
+#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100)
+#define NIX_RQ_AURA_THRESH(x) (((x) * 95) / 100)
/* IRQ triggered when NIX_LF_CINTX_CNT[QCOUNT] crosses this value */
#define CQ_CQE_THRESH_DEFAULT 0x1ULL
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 8a5c839e5..cb1ce4b6f 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -29,6 +29,7 @@ INTERNAL {
roc_nix_fc_config_set;
roc_nix_fc_mode_set;
roc_nix_fc_mode_get;
+ rox_nix_fc_npa_bp_cfg;
roc_nix_get_base_chan;
roc_nix_get_pf;
roc_nix_get_pf_func;
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index bf4052c76..2060c8fe8 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -6,18 +6,6 @@
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
-static void
-cn10k_init_hws_ops(struct cn10k_sso_hws *ws, uintptr_t base)
-{
- ws->tag_wqe_op = base + SSOW_LF_GWS_WQE0;
- ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0;
- ws->updt_wqe_op = base + SSOW_LF_GWS_OP_UPD_WQP_GRP1;
- ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM;
- ws->swtag_untag_op = base + SSOW_LF_GWS_OP_SWTAG_UNTAG;
- ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH;
- ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED;
-}
-
static uint32_t
cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev)
{
@@ -56,7 +44,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
- cn10k_init_hws_ops(ws, ws->base);
ws->hws_id = port_id;
ws->swtag_req = 0;
ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -135,13 +122,14 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
cq_ds_cnt &= 0x3FFF3FFF0000;
while (aq_cnt || cq_ds_cnt || ds_cnt) {
- plt_write64(req, ws->getwrk_op);
+ plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
cn10k_sso_hws_get_work_empty(ws, &ev);
if (fn != NULL && ev.u64 != 0)
fn(arg, ev);
if (ev.sched_type != SSO_TT_EMPTY)
- cnxk_sso_hws_swtag_flush(ws->tag_wqe_op,
- ws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(
+ ws->base + SSOW_LF_GWS_WQE0,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
do {
val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
} while (val & BIT_ULL(56));
@@ -205,9 +193,11 @@ cn10k_sso_hws_reset(void *arg, void *hws)
if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) !=
SSO_TT_EMPTY) {
- plt_write64(BIT_ULL(16) | 1, ws->getwrk_op);
+ plt_write64(BIT_ULL(16) | 1,
+ ws->base + SSOW_LF_GWS_OP_GET_WORK0);
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
@@ -407,6 +397,80 @@ cn10k_sso_selftest(void)
return cnxk_sso_selftest(RTE_STR(event_cn10k));
}
+static int
+cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int rc;
+
+ RTE_SET_USED(event_dev);
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9);
+ if (rc)
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+ else
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+ return 0;
+}
+
+static void
+cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem,
+ void *tstmp_info)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ struct cn10k_sso_hws *ws = event_dev->data->ports[i];
+ ws->lookup_mem = lookup_mem;
+ ws->tstamp = tstmp_info;
+ }
+}
+
+static int
+cn10k_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cn10k_eth_rxq *rxq;
+ void *lookup_mem;
+ void *tstmp_info;
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id,
+ queue_conf);
+ if (rc)
+ return -EINVAL;
+ rxq = eth_dev->data->rx_queues[0];
+ lookup_mem = rxq->lookup_mem;
+ tstmp_info = rxq->tstamp;
+ cn10k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info);
+ cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.dev_infos_get = cn10k_sso_info_get,
.dev_configure = cn10k_sso_dev_configure,
@@ -420,6 +484,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.port_unlink = cn10k_sso_port_unlink,
.timeout_ticks = cnxk_sso_timeout_ticks,
+ .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get,
+ .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add,
+ .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del,
+ .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+ .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
@@ -502,6 +572,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn10k, cn10k_pci_sso_map);
RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
CNXK_SSO_GGRP_QOS "=<string>"
+ CNXK_SSO_FORCE_BP "=1"
CN10K_SSO_GW_MODE "=<int>"
CNXK_TIM_DISABLE_NPA "=1"
CNXK_TIM_CHNK_SLOTS "=<int>"
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index e2aa534c6..5dbae275b 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -18,7 +18,8 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev)
cn10k_sso_hws_forward_event(ws, ev);
break;
case RTE_EVENT_OP_RELEASE:
- cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, ws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
break;
default:
return 0;
@@ -69,7 +70,7 @@ cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
if (ws->swtag_req) {
ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
return 1;
}
@@ -94,7 +95,7 @@ cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
if (ws->swtag_req) {
ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
return ret;
}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 2f093a8dd..c7250bf9e 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,9 +5,13 @@
#ifndef __CN10K_WORKER_H__
#define __CN10K_WORKER_H__
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
+#include "cn10k_ethdev.h"
+#include "cn10k_rx.h"
+
/* SSO Operations */
static __rte_always_inline uint8_t
@@ -31,7 +35,8 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
{
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(ws->tag_wqe_op));
+ const uint8_t cur_tt =
+ CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0));
/* CNXK model
* cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
@@ -43,9 +48,11 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
if (new_tt == SSO_TT_UNTAGGED) {
if (cur_tt != SSO_TT_UNTAGGED)
- cnxk_sso_hws_swtag_untag(ws->swtag_untag_op);
+ cnxk_sso_hws_swtag_untag(ws->base +
+ SSOW_LF_GWS_OP_SWTAG_UNTAG);
} else {
- cnxk_sso_hws_swtag_norm(tag, new_tt, ws->swtag_norm_op);
+ cnxk_sso_hws_swtag_norm(tag, new_tt,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_NORM);
}
ws->swtag_req = 1;
}
@@ -57,8 +64,9 @@ cn10k_sso_hws_fwd_group(struct cn10k_sso_hws *ws, const struct rte_event *ev,
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- plt_write64(ev->u64, ws->updt_wqe_op);
- cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op);
+ plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+ cnxk_sso_hws_swtag_desched(tag, new_tt, grp,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
}
static __rte_always_inline void
@@ -68,7 +76,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
const uint8_t grp = ev->queue_id;
/* Group hasn't changed, Use SWTAG to forward the event */
- if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_wqe_op)) == grp)
+ if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp)
cn10k_sso_hws_fwd_swtag(ws, ev);
else
/*
@@ -93,12 +101,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
PLT_CPU_FEATURE_PREAMBLE
"caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
: [wdata] "+r"(gw.get_work)
- : [gw_loc] "r"(ws->getwrk_op)
+ : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
: "memory");
#else
- plt_write64(gw.u64[0], ws->getwrk_op);
+ plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
@@ -130,11 +139,12 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
: [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
- : [tag_loc] "r"(ws->tag_wqe_op)
+ : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
: "memory");
#else
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
#endif
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 0684417ea..072800c24 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -481,6 +481,88 @@ cn9k_sso_selftest(void)
return cnxk_sso_selftest(RTE_STR(event_cn9k));
}
+static int
+cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int rc;
+
+ RTE_SET_USED(event_dev);
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9);
+ if (rc)
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+ else
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+ return 0;
+}
+
+static void
+cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem,
+ void *tstmp_info)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ if (dev->dual_ws) {
+ struct cn9k_sso_hws_dual *dws =
+ event_dev->data->ports[i];
+ dws->lookup_mem = lookup_mem;
+ dws->tstamp = tstmp_info;
+ } else {
+ struct cn9k_sso_hws *ws = event_dev->data->ports[i];
+ ws->lookup_mem = lookup_mem;
+ ws->tstamp = tstmp_info;
+ }
+ }
+}
+
+static int
+cn9k_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cn9k_eth_rxq *rxq;
+ void *lookup_mem;
+ void *tstmp_info;
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (rc)
+ return -EINVAL;
+
+ rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id,
+ queue_conf);
+ if (rc)
+ return -EINVAL;
+
+ rxq = eth_dev->data->rx_queues[0];
+ lookup_mem = rxq->lookup_mem;
+ tstmp_info = rxq->tstamp;
+ cn9k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info);
+ cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (rc)
+ return -EINVAL;
+
+ return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.dev_infos_get = cn9k_sso_info_get,
.dev_configure = cn9k_sso_dev_configure,
@@ -494,6 +576,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.port_unlink = cn9k_sso_port_unlink,
.timeout_ticks = cnxk_sso_timeout_ticks,
+ .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get,
+ .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add,
+ .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del,
+ .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+ .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
@@ -571,6 +659,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn9k, cn9k_pci_sso_map);
RTE_PMD_REGISTER_KMOD_DEP(event_cn9k, "vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(event_cn9k, CNXK_SSO_XAE_CNT "=<int>"
CNXK_SSO_GGRP_QOS "=<string>"
+ CNXK_SSO_FORCE_BP "=1"
CN9K_SSO_SINGLE_WS "=1"
CNXK_TIM_DISABLE_NPA "=1"
CNXK_TIM_CHNK_SLOTS "=<int>"
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 38fca08fb..f5a440146 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -5,9 +5,13 @@
#ifndef __CN9K_WORKER_H__
#define __CN9K_WORKER_H__
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
+#include "cn9k_ethdev.h"
+#include "cn9k_rx.h"
+
/* SSO Operations */
static __rte_always_inline uint8_t
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 7189ee3a7..cfd7fb971 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -571,6 +571,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
&dev->xae_cnt);
rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict,
dev);
+ rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_value,
+ &dev->force_ena_bp);
rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_value,
&single_ws);
rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 668e51d62..b65d725f5 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -6,6 +6,8 @@
#define __CNXK_EVENTDEV_H__
#include <rte_devargs.h>
+#include <rte_ethdev.h>
+#include <rte_event_eth_rx_adapter.h>
#include <rte_kvargs.h>
#include <rte_mbuf_pool_ops.h>
#include <rte_pci.h>
@@ -18,6 +20,7 @@
#define CNXK_SSO_XAE_CNT "xae_cnt"
#define CNXK_SSO_GGRP_QOS "qos"
+#define CNXK_SSO_FORCE_BP "force_rx_bp"
#define CN9K_SSO_SINGLE_WS "single_ws"
#define CN10K_SSO_GW_MODE "gw_mode"
@@ -81,7 +84,10 @@ struct cnxk_sso_evdev {
uint64_t nb_xaq_cfg;
rte_iova_t fc_iova;
struct rte_mempool *xaq_pool;
+ uint64_t rx_offloads;
uint64_t adptr_xae_cnt;
+ uint16_t rx_adptr_pool_cnt;
+ uint64_t *rx_adptr_pools;
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
@@ -89,25 +95,18 @@ struct cnxk_sso_evdev {
uint32_t xae_cnt;
uint8_t qos_queue_cnt;
struct cnxk_sso_qos *qos_parse_data;
+ uint8_t force_ena_bp;
/* CN9K */
uint8_t dual_ws;
/* CN10K */
uint8_t gw_mode;
} __rte_cache_aligned;
-/* CN10K HWS ops */
-#define CN10K_SSO_HWS_OPS \
- uintptr_t swtag_desched_op; \
- uintptr_t swtag_flush_op; \
- uintptr_t swtag_untag_op; \
- uintptr_t swtag_norm_op; \
- uintptr_t updt_wqe_op; \
- uintptr_t tag_wqe_op; \
- uintptr_t getwrk_op
-
struct cn10k_sso_hws {
- /* Get Work Fastpath data */
- CN10K_SSO_HWS_OPS;
+ uint64_t base;
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint32_t gw_wdata;
uint8_t swtag_req;
uint8_t hws_id;
@@ -115,7 +114,6 @@ struct cn10k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base;
uintptr_t lmt_base;
} __rte_cache_aligned;
@@ -132,6 +130,9 @@ struct cn10k_sso_hws {
struct cn9k_sso_hws {
/* Get Work Fastpath data */
CN9K_SSO_HWS_OPS;
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint8_t swtag_req;
uint8_t hws_id;
/* Add Work Fastpath data */
@@ -148,6 +149,9 @@ struct cn9k_sso_hws_state {
struct cn9k_sso_hws_dual {
/* Get Work Fastpath data */
struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint8_t swtag_req;
uint8_t vws; /* Ping pong bit */
uint8_t hws_id;
@@ -250,4 +254,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev,
/* CN9K */
void cn9k_sso_set_rsrc(void *arg);
+/* Common adapter ops */
+int cnxk_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf);
+int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id);
+int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev);
+int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev);
+
#endif /* __CNXK_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 89a1d82c1..24bfd985e 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -2,6 +2,7 @@
* Copyright(C) 2021 Marvell.
*/
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
void
@@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
int i;
switch (event_type) {
+ case RTE_EVENT_TYPE_ETHDEV: {
+ struct cnxk_eth_rxq_sp *rxq = data;
+ uint64_t *old_ptr;
+
+ for (i = 0; i < dev->rx_adptr_pool_cnt; i++) {
+ if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i])
+ return;
+ }
+
+ dev->rx_adptr_pool_cnt++;
+ old_ptr = dev->rx_adptr_pools;
+ dev->rx_adptr_pools = rte_realloc(
+ dev->rx_adptr_pools,
+ sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0);
+ if (dev->rx_adptr_pools == NULL) {
+ dev->adptr_xae_cnt += rxq->qconf.mp->size;
+ dev->rx_adptr_pools = old_ptr;
+ dev->rx_adptr_pool_cnt--;
+ return;
+ }
+ dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] =
+ (uint64_t)rxq->qconf.mp;
+
+ dev->adptr_xae_cnt += rxq->qconf.mp->size;
+ break;
+ }
case RTE_EVENT_TYPE_TIMER: {
struct cnxk_tim_ring *timr = data;
uint16_t *old_ring_ptr;
@@ -65,3 +92,152 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
break;
}
}
+
+static int
+cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id,
+ uint16_t port_id, const struct rte_event *ev,
+ uint8_t custom_flowid)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+ rq->sso_ena = 1;
+ rq->tt = ev->sched_type;
+ rq->hwgrp = ev->queue_id;
+ rq->flow_tag_width = 20;
+ rq->wqe_skip = 1;
+ rq->tag_mask = (port_id & 0xF) << 20;
+ rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4))
+ << 24;
+
+ if (custom_flowid) {
+ rq->flow_tag_width = 0;
+ rq->tag_mask |= ev->flow_id;
+ }
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+static int
+cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+ rq->sso_ena = 0;
+ rq->flow_tag_width = 32;
+ rq->tag_mask = 0;
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+int
+cnxk_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ uint16_t port = eth_dev->data->port_id;
+ struct cnxk_eth_rxq_sp *rxq_sp;
+ int i, rc = 0;
+
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+ rxq_sp = eth_dev->data->rx_queues[i];
+ rxq_sp = rxq_sp - 1;
+ cnxk_sso_updt_xae_cnt(dev, rxq_sp,
+ RTE_EVENT_TYPE_ETHDEV);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc |= cnxk_sso_rxq_enable(
+ cnxk_eth_dev, i, port, &queue_conf->ev,
+ !!(queue_conf->rx_queue_flags &
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID));
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, true,
+ dev->force_ena_bp);
+ }
+ } else {
+ rxq_sp = eth_dev->data->rx_queues[rx_queue_id];
+ rxq_sp = rxq_sp - 1;
+ cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc |= cnxk_sso_rxq_enable(
+ cnxk_eth_dev, (uint16_t)rx_queue_id, port,
+ &queue_conf->ev,
+ !!(queue_conf->rx_queue_flags &
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID));
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, true,
+ dev->force_ena_bp);
+ }
+
+ if (rc < 0) {
+ plt_err("Failed to configure Rx adapter port=%d, q=%d", port,
+ queue_conf->ev.queue_id);
+ return rc;
+ }
+
+ dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags;
+
+ return 0;
+}
+
+int
+cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ struct cnxk_eth_rxq_sp *rxq_sp;
+ int i, rc = 0;
+
+ RTE_SET_USED(event_dev);
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+ rxq_sp = eth_dev->data->rx_queues[rx_queue_id];
+ rxq_sp = rxq_sp - 1;
+ rc = cnxk_sso_rxq_disable(cnxk_eth_dev, i);
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, false,
+ dev->force_ena_bp);
+ }
+ } else {
+ rxq_sp = eth_dev->data->rx_queues[rx_queue_id];
+ rxq_sp = rxq_sp - 1;
+ rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id);
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, false,
+ dev->force_ena_bp);
+ }
+
+ if (rc < 0)
+ plt_err("Failed to clear Rx adapter config port=%d, q=%d",
+ eth_dev->data->port_id, rx_queue_id);
+
+ return rc;
+}
+
+int
+cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev)
+{
+ RTE_SET_USED(event_dev);
+ RTE_SET_USED(eth_dev);
+
+ return 0;
+}
+
+int
+cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev)
+{
+ RTE_SET_USED(event_dev);
+ RTE_SET_USED(eth_dev);
+
+ return 0;
+}
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 87bb9f76a..eda562f5b 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -21,4 +21,11 @@ sources = files(
'cnxk_tim_worker.c',
)
-deps += ['bus_pci', 'common_cnxk']
+extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
+foreach flag: extra_flags
+ if cc.has_argument(flag)
+ cflags += flag
+ endif
+endforeach
+
+deps += ['bus_pci', 'common_cnxk', 'net_cnxk']
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v4 2/7] event/cnxk: add Rx adapter fastpath ops
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 1/7] event/cnxk: add Rx adapter support pbhagavatula
@ 2021-06-28 19:52 ` pbhagavatula
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 3/7] event/cnxk: add Tx adapter support pbhagavatula
` (5 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-28 19:52 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Rx adapter fastpath operations.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 136 +++++++-
drivers/event/cnxk/cn10k_worker.c | 54 ----
drivers/event/cnxk/cn10k_worker.h | 97 +++++-
drivers/event/cnxk/cn10k_worker_deq.c | 44 +++
drivers/event/cnxk/cn10k_worker_deq_burst.c | 29 ++
drivers/event/cnxk/cn10k_worker_deq_tmo.c | 72 +++++
drivers/event/cnxk/cn9k_eventdev.c | 305 +++++++++++++++++-
drivers/event/cnxk/cn9k_worker.c | 117 -------
drivers/event/cnxk/cn9k_worker.h | 174 ++++++++--
drivers/event/cnxk/cn9k_worker_deq.c | 44 +++
drivers/event/cnxk/cn9k_worker_deq_burst.c | 29 ++
drivers/event/cnxk/cn9k_worker_deq_tmo.c | 72 +++++
drivers/event/cnxk/cn9k_worker_dual_deq.c | 53 +++
.../event/cnxk/cn9k_worker_dual_deq_burst.c | 30 ++
drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 89 +++++
drivers/event/cnxk/cnxk_eventdev.h | 1 +
drivers/event/cnxk/meson.build | 9 +
17 files changed, 1124 insertions(+), 231 deletions(-)
create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c
create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c
create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 2060c8fe8..ba7d95fff 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -237,17 +237,141 @@ static void
cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst;
-
- event_dev->dequeue = cn10k_sso_hws_deq;
- event_dev->dequeue_burst = cn10k_sso_hws_deq_burst;
- if (dev->is_timeout_deq) {
- event_dev->dequeue = cn10k_sso_hws_tmo_deq;
- event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_deq_seg
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_seg_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_tmo_deq_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_deq
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_tmo_deq
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_tmo_deq_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
}
}
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index 5dbae275b..c71aa3732 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -60,57 +60,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-
-uint16_t __rte_hot
-cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn10k_sso_hws *ws = port;
-
- RTE_SET_USED(timeout_ticks);
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
- return 1;
- }
-
- return cn10k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
- uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn10k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn10k_sso_hws *ws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
- return ret;
- }
-
- ret = cn10k_sso_hws_get_work(ws, ev);
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
- ret = cn10k_sso_hws_get_work(ws, ev);
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index c7250bf9e..b724083ca 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -87,20 +87,37 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
cn10k_sso_hws_fwd_group(ws, ev, grp);
}
+static __rte_always_inline void
+cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+ const uint32_t tag, const uint32_t flags,
+ const void *const lookup_mem)
+{
+ const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+
+ cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+ (struct rte_mbuf *)mbuf, lookup_mem,
+ mbuf_init | ((uint64_t)port_id) << 48, flags);
+}
+
static __rte_always_inline uint16_t
-cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
+cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
+ const uint32_t flags, void *lookup_mem)
{
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
gw.get_work = ws->gw_wdata;
#if defined(RTE_ARCH_ARM64) && !defined(__clang__)
asm volatile(
PLT_CPU_FEATURE_PREAMBLE
"caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
- : [wdata] "+r"(gw.get_work)
+ "sub %[mbuf], %H[wdata], #0x80 \n"
+ : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf)
: [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
: "memory");
#else
@@ -109,11 +126,34 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
roc_load_pair(gw.u64[0], gw.u64[1],
ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
+ ws->tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -128,6 +168,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t mbuf;
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -138,7 +179,9 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
" ldp %[tag], %[wqp], [%[tag_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
: "memory");
#else
@@ -146,12 +189,25 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
roc_load_pair(gw.u64[0], gw.u64[1],
ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, 0, NULL);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -170,16 +226,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
const struct rte_event ev[],
uint16_t nb_events);
-uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
#endif
diff --git a/drivers/event/cnxk/cn10k_worker_deq.c b/drivers/event/cnxk/cn10k_worker_deq.c
new file mode 100644
index 000000000..36ec454cc
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return 1; \
+ } \
+ \
+ return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return 1; \
+ } \
+ \
+ return cn10k_sso_hws_get_work( \
+ ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn10k_worker_deq_burst.c b/drivers/event/cnxk/cn10k_worker_deq_burst.c
new file mode 100644
index 000000000..29ecc551c
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq_burst.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn10k_worker_deq_tmo.c b/drivers/event/cnxk/cn10k_worker_deq_tmo.c
new file mode 100644
index 000000000..c8524a27b
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq_tmo.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return ret; \
+ } \
+ \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return ret; \
+ } \
+ \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 072800c24..e386cb784 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -252,17 +252,202 @@ static void
cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ /* Single WS modes */
+ const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_tmo[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_tmo_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_tmo_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_deq_tmo_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ /* Dual WS modes */
+ const event_dequeue_t sso_hws_dual_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_tmo[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_tmo_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_tmo_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_tmo_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
event_dev->enqueue = cn9k_sso_hws_enq;
event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst;
-
- event_dev->dequeue = cn9k_sso_hws_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_deq_burst;
- if (dev->deq_tmo_ns) {
- event_dev->dequeue = cn9k_sso_hws_tmo_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_deq_seg
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_seg_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_deq_tmo_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_tmo_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_deq
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_deq_tmo
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_tmo_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
}
if (dev->dual_ws) {
@@ -272,14 +457,110 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
event_dev->enqueue_forward_burst =
cn9k_sso_hws_dual_enq_fwd_burst;
- event_dev->dequeue = cn9k_sso_hws_dual_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst;
- if (dev->deq_tmo_ns) {
- event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq;
- event_dev->dequeue_burst =
- cn9k_sso_hws_dual_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_dual_deq_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_dual_deq_tmo_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst =
+ sso_hws_dual_deq_tmo_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_dual_deq
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_dual_deq_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_dual_deq_tmo
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst =
+ sso_hws_dual_deq_tmo_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ }
}
}
+
+ rte_mb();
}
static void *
diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c
index 9ceacc98d..538bc4b0b 100644
--- a/drivers/event/cnxk/cn9k_worker.c
+++ b/drivers/event/cnxk/cn9k_worker.c
@@ -60,60 +60,6 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-uint16_t __rte_hot
-cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws *ws = port;
-
- RTE_SET_USED(timeout_ticks);
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_op);
- return 1;
- }
-
- return cn9k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
- uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws *ws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_op);
- return ret;
- }
-
- ret = cn9k_sso_hws_get_work(ws, ev);
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
- ret = cn9k_sso_hws_get_work(ws, ev);
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
-
/* Dual ws ops. */
uint16_t __rte_hot
@@ -171,66 +117,3 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws_dual *dws = port;
- uint16_t gw;
-
- RTE_SET_USED(timeout_ticks);
- if (dws->swtag_req) {
- dws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
- return 1;
- }
-
- gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- return gw;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws_dual *dws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (dws->swtag_req) {
- dws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
- return ret;
- }
-
- ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) {
- ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- }
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index f5a440146..c01c00e1d 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -128,17 +128,36 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws,
}
}
+static __rte_always_inline void
+cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+ const uint32_t tag, const uint32_t flags,
+ const void *const lookup_mem)
+{
+ const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+
+ cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+ (struct rte_mbuf *)mbuf, lookup_mem,
+ mbuf_init | ((uint64_t)port_id) << 48, flags);
+}
+
static __rte_always_inline uint16_t
cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
struct cn9k_sso_hws_state *ws_pair,
- struct rte_event *ev)
+ struct rte_event *ev, const uint32_t flags,
+ const void *const lookup_mem,
+ struct cnxk_timesync_info *const tstamp)
{
const uint64_t set_gw = BIT_ULL(16) | 1;
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
+ if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+ rte_prefetch_non_temporal(lookup_mem);
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
"rty%=: \n"
@@ -147,7 +166,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
" tbnz %[tag], 63, rty%= \n"
"done%=: str %[gw], [%[pong]] \n"
" dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ " prfm pldl1keep, [%[mbuf]] \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op),
[gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op));
#else
@@ -156,12 +178,34 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
plt_write64(set_gw, ws_pair->getwrk_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -169,16 +213,22 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
}
static __rte_always_inline uint16_t
-cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
+cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev,
+ const uint32_t flags, const void *const lookup_mem)
{
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
plt_write64(BIT_ULL(16) | /* wait for work. */
1, /* Use Mask set 0. */
ws->getwrk_op);
+
+ if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+ rte_prefetch_non_temporal(lookup_mem);
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
" ldr %[tag], [%[tag_loc]] \n"
@@ -190,7 +240,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
" ldr %[wqp], [%[wqp_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ " prfm pldl1keep, [%[mbuf]] \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
#else
gw.u64[0] = plt_read64(ws->tag_op);
@@ -198,12 +251,35 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
+ ws->tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -218,6 +294,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t mbuf;
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -230,7 +307,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
" ldr %[wqp], [%[wqp_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
#else
gw.u64[0] = plt_read64(ws->tag_op);
@@ -238,12 +317,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, 0, NULL);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -274,28 +366,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port,
const struct rte_event ev[],
uint16_t nb_events);
-uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-
-uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
#endif
diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c
new file mode 100644
index 000000000..51ccaf4ec
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return 1; \
+ } \
+ \
+ return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return 1; \
+ } \
+ \
+ return cn9k_sso_hws_get_work( \
+ ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_deq_burst.c b/drivers/event/cnxk/cn9k_worker_deq_burst.c
new file mode 100644
index 000000000..4e2801459
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq_burst.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
new file mode 100644
index 000000000..9713d1ef0
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c
new file mode 100644
index 000000000..709fa2d9e
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t gw; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return 1; \
+ } \
+ \
+ gw = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ return gw; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t gw; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return 1; \
+ } \
+ \
+ gw = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ return gw; \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
new file mode 100644
index 000000000..d50e1cf83
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
new file mode 100644
index 000000000..a0508fdf0
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], \
+ &dws->ws_state[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ } \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_tmo_##name(port, ev, \
+ timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], \
+ &dws->ws_state[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ } \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index b65d725f5..9d5d2d033 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -33,6 +33,7 @@
#define CNXK_SSO_MZ_NAME "cnxk_evdev_mz"
#define CNXK_SSO_XAQ_CACHE_CNT (0x7)
#define CNXK_SSO_XAQ_SLACK (8)
+#define CNXK_SSO_WQE_SG_PTR (9)
#define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY)
#define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY)
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index eda562f5b..c5c1c0ee8 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -11,8 +11,17 @@ endif
sources = files(
'cn9k_eventdev.c',
'cn9k_worker.c',
+ 'cn9k_worker_deq.c',
+ 'cn9k_worker_deq_burst.c',
+ 'cn9k_worker_deq_tmo.c',
+ 'cn9k_worker_dual_deq.c',
+ 'cn9k_worker_dual_deq_burst.c',
+ 'cn9k_worker_dual_deq_tmo.c',
'cn10k_eventdev.c',
'cn10k_worker.c',
+ 'cn10k_worker_deq.c',
+ 'cn10k_worker_deq_burst.c',
+ 'cn10k_worker_deq_tmo.c',
'cnxk_eventdev.c',
'cnxk_eventdev_adptr.c',
'cnxk_eventdev_selftest.c',
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v4 3/7] event/cnxk: add Tx adapter support
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
@ 2021-06-28 19:52 ` pbhagavatula
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
` (4 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-28 19:52 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Tx adapter.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 4 +-
doc/guides/rel_notes/release_21_08.rst | 6 +-
drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++++++
drivers/event/cnxk/cn9k_eventdev.c | 117 +++++++++++++++++++++++
drivers/event/cnxk/cnxk_eventdev.h | 21 +++-
drivers/event/cnxk/cnxk_eventdev_adptr.c | 106 ++++++++++++++++++++
6 files changed, 339 insertions(+), 6 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index b7e82c127..6fdccc2ab 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -42,7 +42,9 @@ Features of the OCTEON cnxk SSO PMD are:
- HW managed packets enqueued from ethdev to eventdev exposed through event eth
RX adapter.
- N:1 ethernet device Rx queue to Event queue mapping.
-- Full Rx offload support defined through ethdev queue configuration.
+- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE``
+ capability while maintaining receive packet order.
+- Full Rx/Tx offload support defined through ethdev queue configuration.
Prerequisites and Compilation procedure
---------------------------------------
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 3892c8017..80ff93269 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -60,10 +60,10 @@ New Features
* Added net/cnxk driver which provides the support for the integrated ethernet
device.
-* **Added support for Marvell CN10K, CN9K, event Rx adapter.**
+* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.**
- * Added Rx adapter support for event/cnxk when the ethernet device requested is
- net/cnxk.
+ * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
+ is net/cnxk.
Removed Items
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index ba7d95fff..8a9b04a3d 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+ ws->tx_base = ws->base;
ws->hws_id = port_id;
ws->swtag_req = 0;
ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
}
+static int
+cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ if (dev->tx_adptr_data == NULL)
+ return 0;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ struct cn10k_sso_hws *ws = event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(ws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn10k_sso_hws) +
+ (sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = ws;
+ }
+
+ return 0;
+}
+
static void
cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
@@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
{
int rc;
+ rc = cn10k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+
rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
cn10k_sso_hws_flush_events);
if (rc < 0)
@@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (ret)
+ *caps = 0;
+ else
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+ return 0;
+}
+
+static int
+cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ rc = cn10k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+ cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ return cn10k_sso_updt_tx_adptr_data(event_dev);
+}
+
static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.dev_infos_get = cn10k_sso_info_get,
.dev_configure = cn10k_sso_dev_configure,
@@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get,
+ .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add,
+ .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index e386cb784..bdc563223 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
}
+static int
+cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ if (dev->tx_adptr_data == NULL)
+ return 0;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ if (dev->dual_ws) {
+ struct cn9k_sso_hws_dual *dws =
+ event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(dws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn9k_sso_hws_dual) +
+ (sizeof(uint64_t) *
+ (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ dws = RTE_PTR_ADD(ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&dws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = dws;
+ } else {
+ struct cn9k_sso_hws *ws = event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(ws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn9k_sso_hws_dual) +
+ (sizeof(uint64_t) *
+ (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ ws = RTE_PTR_ADD(ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = ws;
+ }
+ }
+ rte_mb();
+
+ return 0;
+}
+
static void
cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
@@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev)
{
int rc;
+ rc = cn9k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+
rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset,
cn9k_sso_hws_flush_events);
if (rc < 0)
@@ -844,6 +908,55 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (ret)
+ *caps = 0;
+ else
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+ return 0;
+}
+
+static int
+cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ rc = cn9k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+ cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ return cn9k_sso_updt_tx_adptr_data(event_dev);
+}
+
static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.dev_infos_get = cn9k_sso_info_get,
.dev_configure = cn9k_sso_dev_configure,
@@ -863,6 +976,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get,
+ .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add,
+ .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 9d5d2d033..458fdc8d9 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -8,6 +8,7 @@
#include <rte_devargs.h>
#include <rte_ethdev.h>
#include <rte_event_eth_rx_adapter.h>
+#include <rte_event_eth_tx_adapter.h>
#include <rte_kvargs.h>
#include <rte_mbuf_pool_ops.h>
#include <rte_pci.h>
@@ -86,9 +87,12 @@ struct cnxk_sso_evdev {
rte_iova_t fc_iova;
struct rte_mempool *xaq_pool;
uint64_t rx_offloads;
+ uint64_t tx_offloads;
uint64_t adptr_xae_cnt;
uint16_t rx_adptr_pool_cnt;
uint64_t *rx_adptr_pools;
+ uint64_t *tx_adptr_data;
+ uint16_t max_port_id;
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
@@ -115,7 +119,10 @@ struct cn10k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
+ /* Tx Fastpath data */
+ uint64_t tx_base __rte_cache_aligned;
uintptr_t lmt_base;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
/* CN9K HWS ops */
@@ -140,7 +147,9 @@ struct cn9k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base;
+ /* Tx Fastpath data */
+ uint64_t base __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct cn9k_sso_hws_state {
@@ -160,7 +169,9 @@ struct cn9k_sso_hws_dual {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base[2];
+ /* Tx Fastpath data */
+ uint64_t base[2] __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct cnxk_sso_hws_cookie {
@@ -267,5 +278,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
const struct rte_eth_dev *eth_dev);
int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
const struct rte_eth_dev *eth_dev);
+int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id);
+int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id);
#endif /* __CNXK_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 24bfd985e..548d7b81c 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -5,6 +5,8 @@
#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
+#define CNXK_SSO_SQB_LIMIT (0x180)
+
void
cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
uint32_t event_type)
@@ -241,3 +243,107 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
return 0;
}
+
+static int
+cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs)
+{
+ uint16_t sqb_limit;
+
+ sqb_limit = RTE_MIN(nb_sqb_bufs, sq->nb_sqb_bufs);
+ return roc_npa_aura_limit_modify(sq->aura_handle, sqb_limit);
+}
+
+static int
+cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev,
+ uint16_t eth_port_id, uint16_t tx_queue_id,
+ void *txq)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ uint16_t max_port_id = dev->max_port_id;
+ uint64_t *txq_data = dev->tx_adptr_data;
+
+ if (txq_data == NULL || eth_port_id > max_port_id) {
+ max_port_id = RTE_MAX(max_port_id, eth_port_id);
+ txq_data = rte_realloc_socket(
+ txq_data,
+ (sizeof(uint64_t) * (max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, event_dev->data->socket_id);
+ if (txq_data == NULL)
+ return -ENOMEM;
+ }
+
+ ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT])
+ txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq;
+ dev->max_port_id = max_port_id;
+ dev->tx_adptr_data = txq_data;
+ return 0;
+}
+
+int
+cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ struct roc_nix_sq *sq;
+ int i, ret;
+ void *txq;
+
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
+ txq = eth_dev->data->tx_queues[i];
+ sq = &cnxk_eth_dev->sqs[i];
+ cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, i, txq);
+ if (ret < 0)
+ return ret;
+ }
+ } else {
+ txq = eth_dev->data->tx_queues[tx_queue_id];
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id, txq);
+ if (ret < 0)
+ return ret;
+ }
+
+ dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags;
+
+ return 0;
+}
+
+int
+cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct roc_nix_sq *sq;
+ int i, ret;
+
+ RTE_SET_USED(event_dev);
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
+ sq = &cnxk_eth_dev->sqs[i];
+ cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id,
+ NULL);
+ if (ret < 0)
+ return ret;
+ }
+ } else {
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id, NULL);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v4 4/7] event/cnxk: add Tx adapter fastpath ops
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 3/7] event/cnxk: add Tx adapter support pbhagavatula
@ 2021-06-28 19:52 ` pbhagavatula
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
` (3 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-28 19:52 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Tx adapter fastpath operations.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 38 ++++++++
drivers/event/cnxk/cn10k_worker.h | 67 ++++++++++++++
drivers/event/cnxk/cn10k_worker_tx_enq.c | 23 +++++
drivers/event/cnxk/cn10k_worker_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cn9k_eventdev.c | 81 +++++++++++++++++
drivers/event/cnxk/cn9k_worker.h | 87 +++++++++++++++++++
drivers/event/cnxk/cn9k_worker_dual_tx_enq.c | 23 +++++
.../event/cnxk/cn9k_worker_dual_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cn9k_worker_tx_enq.c | 23 +++++
drivers/event/cnxk/cn9k_worker_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/meson.build | 6 ++
11 files changed, 417 insertions(+)
create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c
create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 8a9b04a3d..e462f770c 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
#undef R
};
+ /* Tx modes */
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
@@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
}
}
+
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
+
+ event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
}
static void
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index b724083ca..3c90c8500 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -11,6 +11,7 @@
#include "cn10k_ethdev.h"
#include "cn10k_rx.h"
+#include "cn10k_tx.h"
/* SSO Operations */
@@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
NIX_RX_FASTPATH_MODES
#undef R
+static __rte_always_inline const struct cn10k_eth_txq *
+cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+ return (const struct cn10k_eth_txq *)
+ txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline uint16_t
+cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
+ uint64_t *cmd,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ const struct cn10k_eth_txq *txq;
+ struct rte_mbuf *m = ev->mbuf;
+ uint16_t ref_cnt = m->refcnt;
+ uintptr_t lmt_addr;
+ uint16_t lmt_id;
+ uintptr_t pa;
+
+ lmt_addr = ws->lmt_base;
+ ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+ txq = cn10k_sso_hws_xtract_meta(m, txq_data);
+ cn10k_nix_tx_skeleton(txq, cmd, flags);
+ /* Perform header writes before barrier for TSO */
+ if (flags & NIX_TX_OFFLOAD_TSO_F)
+ cn10k_nix_xmit_prepare_tso(m, flags);
+
+ cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt);
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw =
+ cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags);
+ pa = txq->io_addr | ((segdw - 1) << 4);
+ } else {
+ pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
+ }
+ if (!ev->sched_type)
+ cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
+
+ roc_lmt_submit_steorl(lmt_id, pa);
+
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if (ref_cnt > 1)
+ return 1;
+ }
+
+ cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
+ ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+ return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
#endif
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c
new file mode 100644
index 000000000..f9968ac0d
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn10k_sso_hws_event_tx( \
+ ws, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
new file mode 100644
index 000000000..a24fc42e5
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn10k_sso_hws_event_tx( \
+ ws, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index bdc563223..af97020f2 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -430,6 +430,39 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
#undef R
};
+ /* Tx modes */
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
event_dev->enqueue = cn9k_sso_hws_enq;
event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
@@ -510,6 +543,25 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
}
}
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
+
if (dev->dual_ws) {
event_dev->enqueue = cn9k_sso_hws_dual_enq;
event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst;
@@ -618,8 +670,37 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
NIX_RX_OFFLOAD_RSS_F)];
}
}
+
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM]
+ */
+ event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
}
+ event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
rte_mb();
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index c01c00e1d..5aa053c58 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -11,6 +11,7 @@
#include "cn9k_ethdev.h"
#include "cn9k_rx.h"
+#include "cn9k_tx.h"
/* SSO Operations */
@@ -416,4 +417,90 @@ NIX_RX_FASTPATH_MODES
NIX_RX_FASTPATH_MODES
#undef R
+static __rte_always_inline const struct cn9k_eth_txq *
+cn9k_sso_hws_xtract_meta(struct rte_mbuf *m,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+ return (const struct cn9k_eth_txq *)
+ txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline void
+cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m,
+ uint64_t *cmd, const uint32_t flags)
+{
+ roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags));
+ cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt);
+}
+
+static __rte_always_inline uint16_t
+cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ struct rte_mbuf *m = ev->mbuf;
+ const struct cn9k_eth_txq *txq;
+ uint16_t ref_cnt = m->refcnt;
+
+ /* Perform header writes before barrier for TSO */
+ cn9k_nix_xmit_prepare_tso(m, flags);
+ /* Lets commit any changes in the packet here in case when
+ * fast free is set as no further changes will be made to mbuf.
+ * In case of fast free is not set, both cn9k_nix_prepare_mseg()
+ * and cn9k_nix_xmit_prepare() has a barrier after refcnt update.
+ */
+ if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
+ rte_io_wmb();
+ txq = cn9k_sso_hws_xtract_meta(m, txq_data);
+ cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags);
+
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
+ if (!CNXK_TT_FROM_EVENT(ev->event)) {
+ cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
+ cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+ cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
+ txq->io_addr, segdw);
+ } else {
+ cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr,
+ segdw);
+ }
+ } else {
+ if (!CNXK_TT_FROM_EVENT(ev->event)) {
+ cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
+ cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+ cn9k_nix_xmit_one(cmd, txq->lmt_addr,
+ txq->io_addr, flags);
+ } else {
+ cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr,
+ flags);
+ }
+ }
+
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if (ref_cnt > 1)
+ return 1;
+ }
+
+ cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG,
+ base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+ return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
#endif
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
new file mode 100644
index 000000000..92e2981f0
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn9k_sso_hws_dual *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base[!ws->vws], &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
new file mode 100644
index 000000000..dfb574cf9
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn9k_sso_hws_dual *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base[!ws->vws], &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c
new file mode 100644
index 000000000..3df649c0c
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
new file mode 100644
index 000000000..0efe29113
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index c5c1c0ee8..13e0634e8 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -17,11 +17,17 @@ sources = files(
'cn9k_worker_dual_deq.c',
'cn9k_worker_dual_deq_burst.c',
'cn9k_worker_dual_deq_tmo.c',
+ 'cn9k_worker_tx_enq.c',
+ 'cn9k_worker_tx_enq_seg.c',
+ 'cn9k_worker_dual_tx_enq.c',
+ 'cn9k_worker_dual_tx_enq_seg.c',
'cn10k_eventdev.c',
'cn10k_worker.c',
'cn10k_worker_deq.c',
'cn10k_worker_deq_burst.c',
'cn10k_worker_deq_tmo.c',
+ 'cn10k_worker_tx_enq.c',
+ 'cn10k_worker_tx_enq_seg.c',
'cnxk_eventdev.c',
'cnxk_eventdev_adptr.c',
'cnxk_eventdev_selftest.c',
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v4 5/7] event/cnxk: add Rx adapter vector support
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (2 preceding siblings ...)
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
@ 2021-06-28 19:52 ` pbhagavatula
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
` (2 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-28 19:52 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add event vector support for cnxk event Rx adapter, add control path
APIs to get vector limits and ability to configure event vectorization
on a given Rx queue.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 2 +
drivers/event/cnxk/cn10k_eventdev.c | 106 ++++++++++++++++++++++-
drivers/event/cnxk/cnxk_eventdev.h | 2 +
drivers/event/cnxk/cnxk_eventdev_adptr.c | 25 ++++++
drivers/net/cnxk/cnxk_ethdev.h | 2 +-
5 files changed, 135 insertions(+), 2 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 6fdccc2ab..0297cd3d5 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -45,6 +45,8 @@ Features of the OCTEON cnxk SSO PMD are:
- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE``
capability while maintaining receive packet order.
- Full Rx/Tx offload support defined through ethdev queue configuration.
+- HW managed event vectorization on CN10K for packets enqueued from ethdev to
+ eventdev configurable per each Rx queue in Rx adapter.
Prerequisites and Compilation procedure
---------------------------------------
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index e462f770c..e85fa4785 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
else
*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
- RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
return 0;
}
@@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn10k_sso_rx_adapter_vector_limits(
+ const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+ struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev;
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (ret)
+ return -ENOTSUP;
+
+ cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+ limits->log2_sz = true;
+ limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+ limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+ limits->min_timeout_ns =
+ (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100;
+ limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns;
+
+ return 0;
+}
+
+static int
+cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
+ uint16_t port_id, uint16_t rq_id, uint16_t sz,
+ uint64_t tmo_ns, struct rte_mempool *vmp)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+
+ if (!rq->sso_ena)
+ return -EINVAL;
+ if (rq->flow_tag_width == 0)
+ return -EINVAL;
+
+ rq->vwqe_ena = 1;
+ rq->vwqe_first_skip = 0;
+ rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id);
+ rq->vwqe_max_sz_exp = rte_log2_u32(sz);
+ rq->vwqe_wait_tmo =
+ tmo_ns /
+ ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100);
+ rq->tag_mask = (port_id & 0xF) << 20;
+ rq->tag_mask |=
+ (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4))
+ << 24;
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+static int
+cn10k_sso_rx_adapter_vector_config(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_event_vector_config *config)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev;
+ struct cnxk_sso_evdev *dev;
+ int i, rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ dev = cnxk_sso_pmd_priv(event_dev);
+ cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+ cnxk_sso_updt_xae_cnt(dev, config->vector_mp,
+ RTE_EVENT_TYPE_ETHDEV_VECTOR);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc = cnxk_sso_rx_adapter_vwqe_enable(
+ cnxk_eth_dev, eth_dev->data->port_id, i,
+ config->vector_sz, config->vector_timeout_ns,
+ config->vector_mp);
+ if (rc)
+ return -EINVAL;
+ }
+ } else {
+
+ cnxk_sso_updt_xae_cnt(dev, config->vector_mp,
+ RTE_EVENT_TYPE_ETHDEV_VECTOR);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc = cnxk_sso_rx_adapter_vwqe_enable(
+ cnxk_eth_dev, eth_dev->data->port_id, rx_queue_id,
+ config->vector_sz, config->vector_timeout_ns,
+ config->vector_mp);
+ if (rc)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int
cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
const struct rte_eth_dev *eth_dev, uint32_t *caps)
@@ -739,6 +839,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_rx_adapter_vector_limits_get = cn10k_sso_rx_adapter_vector_limits,
+ .eth_rx_adapter_event_vector_config =
+ cn10k_sso_rx_adapter_vector_config,
+
.eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get,
.eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add,
.eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 458fdc8d9..3783e0c95 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -96,6 +96,8 @@ struct cnxk_sso_evdev {
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
+ uint16_t vec_pool_cnt;
+ uint64_t *vec_pools;
/* Dev args */
uint32_t xae_cnt;
uint8_t qos_queue_cnt;
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 548d7b81c..c4c4f5a7f 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -40,6 +40,31 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
dev->adptr_xae_cnt += rxq->qconf.mp->size;
break;
}
+ case RTE_EVENT_TYPE_ETHDEV_VECTOR: {
+ struct rte_mempool *mp = data;
+ uint64_t *old_ptr;
+
+ for (i = 0; i < dev->vec_pool_cnt; i++) {
+ if ((uint64_t)mp == dev->vec_pools[i])
+ return;
+ }
+
+ dev->vec_pool_cnt++;
+ old_ptr = dev->vec_pools;
+ dev->vec_pools =
+ rte_realloc(dev->vec_pools,
+ sizeof(uint64_t) * dev->vec_pool_cnt, 0);
+ if (dev->vec_pools == NULL) {
+ dev->adptr_xae_cnt += mp->size;
+ dev->vec_pools = old_ptr;
+ dev->vec_pool_cnt--;
+ return;
+ }
+ dev->vec_pools[dev->vec_pool_cnt - 1] = (uint64_t)mp;
+
+ dev->adptr_xae_cnt += mp->size;
+ break;
+ }
case RTE_EVENT_TYPE_TIMER: {
struct cnxk_tim_ring *timr = data;
uint16_t *old_ring_ptr;
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 4eead0390..2528b3cda 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -238,7 +238,7 @@ struct cnxk_eth_txq_sp {
} __plt_cache_aligned;
static inline struct cnxk_eth_dev *
-cnxk_eth_pmd_priv(struct rte_eth_dev *eth_dev)
+cnxk_eth_pmd_priv(const struct rte_eth_dev *eth_dev)
{
return eth_dev->data->dev_private;
}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v4 6/7] event/cnxk: add Rx event vector fastpath
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (3 preceding siblings ...)
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
@ 2021-06-28 19:52 ` pbhagavatula
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 7/7] event/cnxk: add Tx " pbhagavatula
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 1/7] event/cnxk: add Rx adapter support pbhagavatula
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-28 19:52 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add Rx event vector fastpath to convert HW defined metadata into
rte_mbuf and rte_event_vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/rel_notes/release_21_08.rst | 1 +
drivers/event/cnxk/cn10k_worker.h | 56 +++++++
drivers/net/cnxk/cn10k_rx.h | 200 +++++++++++++++----------
drivers/net/cnxk/cn10k_rx_vec.c | 2 +-
drivers/net/cnxk/cn10k_rx_vec_mseg.c | 5 +-
5 files changed, 179 insertions(+), 85 deletions(-)
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 80ff93269..11ccc9bcb 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -64,6 +64,7 @@ New Features
* Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
is net/cnxk.
+ * Add support for event vectorization for Rx adapter.
Removed Items
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 3c90c8500..7a48a6b17 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,6 +5,8 @@
#ifndef __CN10K_WORKER_H__
#define __CN10K_WORKER_H__
+#include <rte_vect.h>
+
#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
@@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
mbuf_init | ((uint64_t)port_id) << 48, flags);
}
+static __rte_always_inline void
+cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags,
+ void *lookup_mem, void *tstamp)
+{
+ uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+ struct rte_event_vector *vec;
+ uint16_t nb_mbufs, non_vec;
+ uint64_t **wqe;
+
+ mbuf_init |= ((uint64_t)port_id) << 48;
+ vec = (struct rte_event_vector *)vwqe;
+ wqe = vec->u64s;
+
+ nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+ nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs,
+ flags | NIX_RX_VWQE_F, lookup_mem,
+ tstamp);
+ wqe += nb_mbufs;
+ non_vec = vec->nb_elem - nb_mbufs;
+
+ while (non_vec) {
+ struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+ struct rte_mbuf *mbuf;
+ uint64_t tstamp_ptr;
+
+ mbuf = (struct rte_mbuf *)((char *)cqe -
+ sizeof(struct rte_mbuf));
+ cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem,
+ mbuf_init, flags);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ wqe[0] = (uint64_t *)mbuf;
+ non_vec--;
+ wqe++;
+ }
+}
+
static __rte_always_inline uint16_t
cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
const uint32_t flags, void *lookup_mem)
@@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
flags & NIX_RX_MULTI_SEG_F,
(uint64_t *)tstamp_ptr);
gw.u64[1] = mbuf;
+ } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+ __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1];
+
+ vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) |
+ ((vwqe_hdr & 0xFFFF) << 48) |
+ ((uint64_t)port << 32);
+ *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr;
+ cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem,
+ ws->tstamp);
}
}
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index d9572b19e..a506a867c 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -21,6 +21,7 @@
* Defining it from backwards to denote its been
* not used as offload flags to pick function
*/
+#define NIX_RX_VWQE_F BIT(14)
#define NIX_RX_MULTI_SEG_F BIT(15)
#define CNXK_NIX_CQ_ENTRY_SZ 128
@@ -28,6 +29,11 @@
#define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x))
#define CQE_SZ(x) ((x) * CNXK_NIX_CQ_ENTRY_SZ)
+#define CQE_PTR_OFF(b, i, o, f) \
+ (((f) & NIX_RX_VWQE_F) ? \
+ (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \
+ (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o)))
+
union mbuf_initializer {
struct {
uint16_t data_off;
@@ -317,61 +323,87 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf)
}
static __rte_always_inline uint16_t
-cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t pkts, const uint16_t flags)
+cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts,
+ const uint16_t flags, void *lookup_mem,
+ struct cnxk_timesync_info *tstamp)
{
- struct cn10k_eth_rxq *rxq = rx_queue;
- uint16_t packets = 0;
+ struct cn10k_eth_rxq *rxq = args;
+ const uint64_t mbuf_initializer = (flags & NIX_RX_VWQE_F) ?
+ *(uint64_t *)args :
+ rxq->mbuf_initializer;
+ const uint64x2_t data_off = flags & NIX_RX_VWQE_F ?
+ vdupq_n_u64(0x80ULL) :
+ vdupq_n_u64(rxq->data_off);
+ const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask;
+ const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata;
+ const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc;
uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23;
- const uint64_t mbuf_initializer = rxq->mbuf_initializer;
- const uint64x2_t data_off = vdupq_n_u64(rxq->data_off);
uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3;
uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer);
struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3;
- const uint16_t *lookup_mem = rxq->lookup_mem;
- const uint32_t qmask = rxq->qmask;
- const uint64_t wdata = rxq->wdata;
- const uintptr_t desc = rxq->desc;
uint8x16_t f0, f1, f2, f3;
- uint32_t head = rxq->head;
+ uint16_t packets = 0;
uint16_t pkts_left;
-
- pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
- pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
-
- /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
- pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ uint32_t head;
+ uintptr_t cq0;
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ lookup_mem = rxq->lookup_mem;
+ head = rxq->head;
+
+ pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
+ pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
+ /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+ tstamp = rxq->tstamp;
+ } else {
+ RTE_SET_USED(head);
+ }
while (packets < pkts) {
- /* Exit loop if head is about to wrap and become unaligned */
- if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
- NIX_DESCS_PER_LOOP) {
- pkts_left += (pkts - packets);
- break;
- }
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Exit loop if head is about to wrap and become
+ * unaligned.
+ */
+ if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
+ NIX_DESCS_PER_LOOP) {
+ pkts_left += (pkts - packets);
+ break;
+ }
- const uintptr_t cq0 = desc + CQE_SZ(head);
+ cq0 = desc + CQE_SZ(head);
+ } else {
+ cq0 = (uintptr_t)&mbufs[packets];
+ }
/* Prefetch N desc ahead */
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(8)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(9)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(10)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(11)));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 8, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 9, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 10, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 11, 0, flags));
/* Get NIX_RX_SG_S for size and buffer pointer */
- cq0_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(0) + 64));
- cq1_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(1) + 64));
- cq2_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(2) + 64));
- cq3_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(3) + 64));
-
- /* Extract mbuf from NIX_RX_SG_S */
- mbuf01 = vzip2q_u64(cq0_w8, cq1_w8);
- mbuf23 = vzip2q_u64(cq2_w8, cq3_w8);
- mbuf01 = vqsubq_u64(mbuf01, data_off);
- mbuf23 = vqsubq_u64(mbuf23, data_off);
+ cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags));
+ cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags));
+ cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags));
+ cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags));
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Extract mbuf from NIX_RX_SG_S */
+ mbuf01 = vzip2q_u64(cq0_w8, cq1_w8);
+ mbuf23 = vzip2q_u64(cq2_w8, cq3_w8);
+ mbuf01 = vqsubq_u64(mbuf01, data_off);
+ mbuf23 = vqsubq_u64(mbuf23, data_off);
+ } else {
+ mbuf01 =
+ vsubq_u64(vld1q_u64((uint64_t *)cq0), data_off);
+ mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)),
+ data_off);
+ }
/* Move mbufs to scalar registers for future use */
mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0);
@@ -395,14 +427,14 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
f3 = vqtbl1q_u8(cq3_w8, shuf_msk);
/* Load CQE word0 and word 1 */
- uint64_t cq0_w0 = ((uint64_t *)(cq0 + CQE_SZ(0)))[0];
- uint64_t cq0_w1 = ((uint64_t *)(cq0 + CQE_SZ(0)))[1];
- uint64_t cq1_w0 = ((uint64_t *)(cq0 + CQE_SZ(1)))[0];
- uint64_t cq1_w1 = ((uint64_t *)(cq0 + CQE_SZ(1)))[1];
- uint64_t cq2_w0 = ((uint64_t *)(cq0 + CQE_SZ(2)))[0];
- uint64_t cq2_w1 = ((uint64_t *)(cq0 + CQE_SZ(2)))[1];
- uint64_t cq3_w0 = ((uint64_t *)(cq0 + CQE_SZ(3)))[0];
- uint64_t cq3_w1 = ((uint64_t *)(cq0 + CQE_SZ(3)))[1];
+ const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags);
+ const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 1, flags);
+ const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags);
+ const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 1, flags);
+ const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags);
+ const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 1, flags);
+ const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags);
+ const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 1, flags);
if (flags & NIX_RX_OFFLOAD_RSS_F) {
/* Fill rss in the rx_descriptor_fields1 */
@@ -459,17 +491,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) {
ol_flags0 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(0) + 38), ol_flags0,
- mbuf0);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags),
+ ol_flags0, mbuf0);
ol_flags1 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(1) + 38), ol_flags1,
- mbuf1);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags),
+ ol_flags1, mbuf1);
ol_flags2 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(2) + 38), ol_flags2,
- mbuf2);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags),
+ ol_flags2, mbuf2);
ol_flags3 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(3) + 38), ol_flags3,
- mbuf3);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags),
+ ol_flags3, mbuf3);
}
if (flags & NIX_RX_OFFLOAD_TSTAMP_F) {
@@ -488,7 +520,7 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
RTE_PTYPE_L2_ETHER_TIMESYNC};
const uint64_t ts_olf = PKT_RX_IEEE1588_PTP |
PKT_RX_IEEE1588_TMST |
- rxq->tstamp->rx_tstamp_dynflag;
+ tstamp->rx_tstamp_dynflag;
const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8};
uint64x2_t ts01, ts23, mask;
uint64_t ts[4];
@@ -526,14 +558,10 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
ts[3] = vgetq_lane_u64(ts23, 1);
/* Store timestamp into dynfield. */
- *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) =
- ts[0];
- *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) =
- ts[1];
- *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) =
- ts[2];
- *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) =
- ts[3];
+ *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0];
+ *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1];
+ *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2];
+ *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3];
/* Generate ptype mask to filter L2 ether timesync */
mask = vdupq_n_u32(vgetq_lane_u32(f0, 0));
@@ -559,9 +587,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
/* Update Rxq timestamp with the latest
* timestamp.
*/
- rxq->tstamp->rx_ready = 1;
- rxq->tstamp->rx_tstamp =
- ts[31 - __builtin_clz(res)];
+ tstamp->rx_ready = 1;
+ tstamp->rx_tstamp = ts[31 - __builtin_clz(res)];
}
}
@@ -584,25 +611,25 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
/* Store the mbufs to rx_pkts */
- vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
- vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
+ vst1q_u64((uint64_t *)&mbufs[packets], mbuf01);
+ vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23);
if (flags & NIX_RX_MULTI_SEG_F) {
/* Multi segment is enable build mseg list for
* individual mbufs in scalar mode.
*/
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(0) + 8), mbuf0,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 0, 8, flags)),
+ mbuf0, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(1) + 8), mbuf1,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 1, 8, flags)),
+ mbuf1, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(2) + 8), mbuf2,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 2, 8, flags)),
+ mbuf2, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(3) + 8), mbuf3,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 3, 8, flags)),
+ mbuf3, mbuf_initializer, flags);
} else {
/* Update that no more segments */
mbuf0->next = NULL;
@@ -623,12 +650,18 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
__mempool_check_cookies(mbuf2->pool, (void **)&mbuf2, 1, 1);
__mempool_check_cookies(mbuf3->pool, (void **)&mbuf3, 1, 1);
- /* Advance head pointer and packets */
- head += NIX_DESCS_PER_LOOP;
- head &= qmask;
packets += NIX_DESCS_PER_LOOP;
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Advance head pointer and packets */
+ head += NIX_DESCS_PER_LOOP;
+ head &= qmask;
+ }
}
+ if (flags & NIX_RX_VWQE_F)
+ return packets;
+
rxq->head = head;
rxq->available -= packets;
@@ -637,8 +670,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
plt_write64((rxq->wdata | packets), rxq->cq_door);
if (unlikely(pkts_left))
- packets += cn10k_nix_recv_pkts(rx_queue, &rx_pkts[packets],
- pkts_left, flags);
+ packets += cn10k_nix_recv_pkts(args, &mbufs[packets], pkts_left,
+ flags);
return packets;
}
@@ -647,12 +680,15 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
static inline uint16_t
cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t pkts, const uint16_t flags)
+ uint16_t pkts, const uint16_t flags,
+ void *lookup_mem, void *tstamp)
{
+ RTE_SET_USED(lookup_mem);
RTE_SET_USED(rx_queue);
RTE_SET_USED(rx_pkts);
RTE_SET_USED(pkts);
RTE_SET_USED(flags);
+ RTE_SET_USED(tstamp);
return 0;
}
diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c
index 93528a44f..166735ad5 100644
--- a/drivers/net/cnxk/cn10k_rx_vec.c
+++ b/drivers/net/cnxk/cn10k_rx_vec.c
@@ -12,7 +12,7 @@
uint16_t pkts) \
{ \
return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
- (flags)); \
+ (flags), NULL, NULL); \
}
NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
index 04d1e46c8..1f44ddddd 100644
--- a/drivers/net/cnxk/cn10k_rx_vec_mseg.c
+++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
@@ -9,8 +9,9 @@
uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
{ \
- return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
- (flags) | NIX_RX_MULTI_SEG_F); \
+ return cn10k_nix_recv_pkts_vector( \
+ rx_queue, rx_pkts, pkts, (flags) | NIX_RX_MULTI_SEG_F, \
+ NULL, NULL); \
}
NIX_RX_FASTPATH_MODES
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v4 7/7] event/cnxk: add Tx event vector fastpath
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (4 preceding siblings ...)
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
@ 2021-06-28 19:52 ` pbhagavatula
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 1/7] event/cnxk: add Rx adapter support pbhagavatula
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-28 19:52 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add Tx event vector fastpath, integrate event vector Tx routine
into Tx burst.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 1 +
doc/guides/rel_notes/release_21_08.rst | 2 +-
drivers/common/cnxk/roc_sso.h | 23 ++++++
drivers/event/cnxk/cn10k_eventdev.c | 3 +-
drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++--
drivers/event/cnxk/cn9k_worker.h | 4 +-
drivers/event/cnxk/cnxk_worker.h | 22 ------
drivers/net/cnxk/cn10k_tx.c | 2 +-
drivers/net/cnxk/cn10k_tx.h | 52 +++++++++----
drivers/net/cnxk/cn10k_tx_mseg.c | 3 +-
drivers/net/cnxk/cn10k_tx_vec.c | 2 +-
drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +-
12 files changed, 167 insertions(+), 53 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 0297cd3d5..53560d383 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -47,6 +47,7 @@ Features of the OCTEON cnxk SSO PMD are:
- Full Rx/Tx offload support defined through ethdev queue configuration.
- HW managed event vectorization on CN10K for packets enqueued from ethdev to
eventdev configurable per each Rx queue in Rx adapter.
+- Event vector transmission via Tx adapter.
Prerequisites and Compilation procedure
---------------------------------------
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 11ccc9bcb..9e49cb27d 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -64,7 +64,7 @@ New Features
* Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
is net/cnxk.
- * Add support for event vectorization for Rx adapter.
+ * Add support for event vectorization for Rx/Tx adapter.
Removed Items
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index a6030e7d8..316c6ccd5 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -44,6 +44,29 @@ struct roc_sso {
uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
} __plt_cache_aligned;
+static __rte_always_inline void
+roc_sso_hws_head_wait(uintptr_t tag_op)
+{
+#ifdef RTE_ARCH_ARM64
+ uint64_t tag;
+
+ asm volatile(PLT_CPU_FEATURE_PREAMBLE
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbnz %[tag], 35, done%= \n"
+ " sevl \n"
+ "rty%=: wfe \n"
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbz %[tag], 35, rty%= \n"
+ "done%=: \n"
+ : [tag] "=&r"(tag)
+ : [tag_op] "r"(tag_op));
+#else
+ /* Wait for the SWTAG/SWTAG_FULL operation */
+ while (!(plt_read64(tag_op) & BIT_ULL(35)))
+ ;
+#endif
+}
+
/* SSO device initialization */
int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso);
int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso);
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index e85fa4785..6f37c5bd2 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
if (ret)
*caps = 0;
else
- *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
return 0;
}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 7a48a6b17..9cc099206 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
NIX_RX_FASTPATH_MODES
#undef R
-static __rte_always_inline const struct cn10k_eth_txq *
+static __rte_always_inline struct cn10k_eth_txq *
cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
{
- return (const struct cn10k_eth_txq *)
+ return (struct cn10k_eth_txq *)
txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
}
+static __rte_always_inline void
+cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+ uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr,
+ uint8_t sched_type, uintptr_t base,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ uint16_t port[4], queue[4];
+ struct cn10k_eth_txq *txq;
+ uint16_t i, j;
+ uintptr_t pa;
+
+ for (i = 0; i < nb_mbufs; i += 4) {
+ port[0] = mbufs[i]->port;
+ port[1] = mbufs[i + 1]->port;
+ port[2] = mbufs[i + 2]->port;
+ port[3] = mbufs[i + 3]->port;
+
+ queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
+ queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]);
+ queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]);
+ queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]);
+
+ if (((port[0] ^ port[1]) & (port[2] ^ port[3])) ||
+ ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) {
+
+ for (j = 0; j < 4; j++) {
+ struct rte_mbuf *m = mbufs[i + j];
+
+ txq = (struct cn10k_eth_txq *)
+ txq_data[port[j]][queue[j]];
+ cn10k_nix_tx_skeleton(txq, cmd, flags);
+ /* Perform header writes before barrier
+ * for TSO
+ */
+ if (flags & NIX_TX_OFFLOAD_TSO_F)
+ cn10k_nix_xmit_prepare_tso(m, flags);
+
+ cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags,
+ txq->lso_tun_fmt);
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw =
+ cn10k_nix_prepare_mseg(
+ m, (uint64_t *)lmt_addr,
+ flags);
+ pa = txq->io_addr | ((segdw - 1) << 4);
+ } else {
+ pa = txq->io_addr |
+ (cn10k_nix_tx_ext_subs(flags) + 1)
+ << 4;
+ }
+ if (!sched_type)
+ roc_sso_hws_head_wait(base +
+ SSOW_LF_GWS_TAG);
+
+ roc_lmt_submit_steorl(lmt_id, pa);
+ }
+ } else {
+ txq = (struct cn10k_eth_txq *)
+ txq_data[port[0]][queue[0]];
+ cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base
+ + SSOW_LF_GWS_TAG,
+ flags | NIX_TX_VWQE_F);
+ }
+ }
+}
+
static __rte_always_inline uint16_t
cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
uint64_t *cmd,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
const uint32_t flags)
{
- const struct cn10k_eth_txq *txq;
- struct rte_mbuf *m = ev->mbuf;
- uint16_t ref_cnt = m->refcnt;
+ struct cn10k_eth_txq *txq;
+ struct rte_mbuf *m;
uintptr_t lmt_addr;
+ uint16_t ref_cnt;
uint16_t lmt_id;
uintptr_t pa;
lmt_addr = ws->lmt_base;
ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+ if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
+ struct rte_mbuf **mbufs = ev->vec->mbufs;
+ uint64_t meta = *(uint64_t *)ev->vec;
+
+ if (meta & BIT(31)) {
+ txq = (struct cn10k_eth_txq *)
+ txq_data[meta >> 32][meta >> 48];
+
+ cn10k_nix_xmit_pkts_vector(
+ txq, mbufs, meta & 0xFFFF, cmd,
+ ws->tx_base + SSOW_LF_GWS_TAG,
+ flags | NIX_TX_VWQE_F);
+ } else {
+ cn10k_sso_vwqe_split_tx(
+ mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr,
+ ev->sched_type, ws->tx_base, txq_data, flags);
+ }
+ rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+ return (meta & 0xFFFF);
+ }
+
+ m = ev->mbuf;
+ ref_cnt = m->refcnt;
txq = cn10k_sso_hws_xtract_meta(m, txq_data);
cn10k_nix_tx_skeleton(txq, cmd, flags);
/* Perform header writes before barrier for TSO */
@@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
}
if (!ev->sched_type)
- cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
roc_lmt_submit_steorl(lmt_id, pa);
@@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
-
return 1;
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 5aa053c58..ef1e83741 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -458,7 +458,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
- cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
txq->io_addr, segdw);
@@ -469,7 +469,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
} else {
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
- cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_one(cmd, txq->lmt_addr,
txq->io_addr, flags);
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 4eb46ae16..945132b74 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -75,27 +75,5 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
#endif
}
-static __rte_always_inline void
-cnxk_sso_hws_head_wait(uintptr_t tag_op)
-{
-#ifdef RTE_ARCH_ARM64
- uint64_t swtp;
-
- asm volatile(PLT_CPU_FEATURE_PREAMBLE
- " ldr %[swtb], [%[swtp_loc]] \n"
- " tbz %[swtb], 35, done%= \n"
- " sevl \n"
- "rty%=: wfe \n"
- " ldr %[swtb], [%[swtp_loc]] \n"
- " tbnz %[swtb], 35, rty%= \n"
- "done%=: \n"
- : [swtb] "=&r"(swtp)
- : [swtp_loc] "r"(tag_op));
-#else
- /* Wait for the SWTAG/SWTAG_FULL operation */
- while (plt_read64(tag_op) & BIT_ULL(35))
- ;
-#endif
-}
#endif
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 1f30bab59..0e1276c60 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -16,7 +16,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \
- flags); \
+ 0, flags); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 532b53b31..d2a24120e 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -18,6 +18,7 @@
* Defining it from backwards to denote its been
* not used as offload flags to pick function
*/
+#define NIX_TX_VWQE_F BIT(14)
#define NIX_TX_MULTI_SEG_F BIT(15)
#define NIX_TX_NEED_SEND_HDR_W1 \
@@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
- uint64_t *cmd, const uint16_t flags)
+ uint64_t *cmd, uintptr_t base, const uint16_t flags)
{
struct cn10k_eth_txq *txq = tx_queue;
const rte_iova_t io_addr = txq->io_addr;
@@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
uint64_t lso_tun_fmt;
uint64_t data;
- NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ if (!(flags & NIX_TX_VWQE_F)) {
+ NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ /* Reduce the cached count */
+ txq->fc_cache_pkts -= pkts;
+ }
/* Get cmd skeleton */
cn10k_nix_tx_skeleton(txq, cmd, flags);
- /* Reduce the cached count */
- txq->fc_cache_pkts -= pkts;
-
if (flags & NIX_TX_OFFLOAD_TSO_F)
lso_tun_fmt = txq->lso_tun_fmt;
@@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
}
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
/* Trigger LMTST */
if (burst > 16) {
data = cn10k_nix_tx_steor_data(flags);
@@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
struct cn10k_eth_txq *txq = tx_queue;
uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
@@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
shft += 3;
}
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
data0 = (uint64_t)data128;
data1 = (uint64_t)(data128 >> 64);
/* Make data0 similar to data1 */
@@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
@@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64_t data[2];
} wd;
- NIX_XMIT_FC_OR_RETURN(txq, pkts);
-
- scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
- pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ if (!(flags & NIX_TX_VWQE_F)) {
+ NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ /* Reduce the cached count */
+ txq->fc_cache_pkts -= pkts;
+ } else {
+ scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ }
- /* Reduce the cached count */
- txq->fc_cache_pkts -= pkts;
/* Perform header writes before barrier for TSO */
if (flags & NIX_TX_OFFLOAD_TSO_F) {
for (i = 0; i < pkts; i++)
@@ -1972,6 +1986,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (flags & NIX_TX_MULTI_SEG_F)
wd.data[0] >>= 16;
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
/* Trigger LMTST */
if (lnum > 16) {
if (!(flags & NIX_TX_MULTI_SEG_F))
@@ -2028,10 +2045,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (unlikely(scalar)) {
if (flags & NIX_TX_MULTI_SEG_F)
pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
- scalar, cmd, flags);
+ scalar, cmd, base,
+ flags);
else
pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
- cmd, flags);
+ cmd, base, flags);
}
return pkts;
@@ -2040,13 +2058,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
#else
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
RTE_SET_USED(tx_queue);
RTE_SET_USED(tx_pkts);
RTE_SET_USED(pkts);
RTE_SET_USED(cmd);
RTE_SET_USED(flags);
+ RTE_SET_USED(base);
return 0;
}
#endif
diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c
index 33f675472..4ea4c8a4e 100644
--- a/drivers/net/cnxk/cn10k_tx_mseg.c
+++ b/drivers/net/cnxk/cn10k_tx_mseg.c
@@ -18,7 +18,8 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \
- (flags) | NIX_TX_MULTI_SEG_F); \
+ 0, (flags) \
+ | NIX_TX_MULTI_SEG_F); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index 34e373750..a0350496a 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -18,7 +18,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
- (flags)); \
+ 0, (flags)); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
index 1fad81dba..7f98f79b9 100644
--- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c
+++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
@@ -16,7 +16,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector( \
- tx_queue, tx_pkts, pkts, cmd, \
+ tx_queue, tx_pkts, pkts, cmd, 0, \
(flags) | NIX_TX_MULTI_SEG_F); \
}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v5 1/7] event/cnxk: add Rx adapter support
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (5 preceding siblings ...)
2021-06-28 19:52 ` [dpdk-dev] [PATCH v4 7/7] event/cnxk: add Tx " pbhagavatula
@ 2021-06-29 8:01 ` pbhagavatula
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
` (6 more replies)
6 siblings, 7 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-29 8:01 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Ray Kinsella,
Neil Horman
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Rx adapter.
Resize cn10k workslot fastpath structure to fit in 64B cacheline size.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v5 Changes:
- Use cnxk_eth_rxq_to_sp instead of manually calculating sp offset.
v4 Changes:
- Split patches for easier merge.
v3 Changes:
- Spell check.
doc/guides/eventdevs/cnxk.rst | 28 ++++
doc/guides/rel_notes/release_21_08.rst | 5 +
drivers/common/cnxk/roc_nix.h | 3 +
drivers/common/cnxk/roc_nix_fc.c | 78 ++++++++++
drivers/common/cnxk/roc_nix_priv.h | 3 +-
drivers/common/cnxk/version.map | 1 +
drivers/event/cnxk/cn10k_eventdev.c | 107 +++++++++++---
drivers/event/cnxk/cn10k_worker.c | 7 +-
drivers/event/cnxk/cn10k_worker.h | 32 +++--
drivers/event/cnxk/cn9k_eventdev.c | 89 ++++++++++++
drivers/event/cnxk/cn9k_worker.h | 4 +
drivers/event/cnxk/cnxk_eventdev.c | 2 +
drivers/event/cnxk/cnxk_eventdev.h | 43 ++++--
drivers/event/cnxk/cnxk_eventdev_adptr.c | 176 +++++++++++++++++++++++
drivers/event/cnxk/meson.build | 9 +-
15 files changed, 540 insertions(+), 47 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 36da3800c..b7e82c127 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -39,6 +39,10 @@ Features of the OCTEON cnxk SSO PMD are:
time granularity of 2.5us on CN9K and 1us on CN10K.
- Up to 256 TIM rings a.k.a event timer adapters.
- Up to 8 rings traversed in parallel.
+- HW managed packets enqueued from ethdev to eventdev exposed through event eth
+ RX adapter.
+- N:1 ethernet device Rx queue to Event queue mapping.
+- Full Rx offload support defined through ethdev queue configuration.
Prerequisites and Compilation procedure
---------------------------------------
@@ -93,6 +97,15 @@ Runtime Config Options
-a 0002:0e:00.0,qos=[1-50-50-50]
+- ``Force Rx Back pressure``
+
+ Force Rx back pressure when same mempool is used across ethernet device
+ connected to event device.
+
+ For example::
+
+ -a 0002:0e:00.0,force_rx_bp=1
+
- ``TIM disable NPA``
By default chunks are allocated from NPA then TIM can automatically free
@@ -160,3 +173,18 @@ Debugging Options
+---+------------+-------------------------------------------------------+
| 2 | TIM | --log-level='pmd\.event\.cnxk\.timer,8' |
+---+------------+-------------------------------------------------------+
+
+Limitations
+-----------
+
+Rx adapter support
+~~~~~~~~~~~~~~~~~~
+
+Using the same mempool for all the ethernet device ports connected to
+event device would cause back pressure to be asserted only on the first
+ethernet device.
+Back pressure is automatically disabled when using same mempool for all the
+ethernet devices connected to event device to override this applications can
+use `force_rx_bp=1` device arguments.
+Using unique mempool per each ethernet device is recommended when they are
+connected to event device.
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 31e49e1a5..3892c8017 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -60,6 +60,11 @@ New Features
* Added net/cnxk driver which provides the support for the integrated ethernet
device.
+* **Added support for Marvell CN10K, CN9K, event Rx adapter.**
+
+ * Added Rx adapter support for event/cnxk when the ethernet device requested is
+ net/cnxk.
+
Removed Items
-------------
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index bb6902795..76613fe84 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix,
enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix);
+void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id,
+ uint8_t ena, uint8_t force);
+
/* NPC */
int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable);
diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c
index 47be8aa3f..f17eba416 100644
--- a/drivers/common/cnxk/roc_nix_fc.c
+++ b/drivers/common/cnxk/roc_nix_fc.c
@@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode mode)
exit:
return rc;
}
+
+void
+rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena,
+ uint8_t force)
+{
+ struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+ struct npa_lf *lf = idev_npa_obj_get();
+ struct npa_aq_enq_req *req;
+ struct npa_aq_enq_rsp *rsp;
+ struct mbox *mbox;
+ uint32_t limit;
+ int rc;
+
+ if (roc_nix_is_sdp(roc_nix))
+ return;
+
+ if (!lf)
+ return;
+ mbox = lf->mbox;
+
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_READ;
+
+ rc = mbox_process_msg(mbox, (void *)&rsp);
+ if (rc)
+ return;
+
+ limit = rsp->aura.limit;
+ /* BP is already enabled. */
+ if (rsp->aura.bp_ena) {
+ /* If BP ids don't match disable BP. */
+ if ((rsp->aura.nix0_bpid != nix->bpid[0]) && !force) {
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_WRITE;
+
+ req->aura.bp_ena = 0;
+ req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena);
+
+ mbox_process(mbox);
+ }
+ return;
+ }
+
+ /* BP was previously enabled but now disabled skip. */
+ if (rsp->aura.bp)
+ return;
+
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_WRITE;
+
+ if (ena) {
+ req->aura.nix0_bpid = nix->bpid[0];
+ req->aura_mask.nix0_bpid = ~(req->aura_mask.nix0_bpid);
+ req->aura.bp = NIX_RQ_AURA_THRESH(
+ limit > 128 ? 256 : limit); /* 95% of size*/
+ req->aura_mask.bp = ~(req->aura_mask.bp);
+ }
+
+ req->aura.bp_ena = !!ena;
+ req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena);
+
+ mbox_process(mbox);
+}
diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h
index d9c32df44..9dc0c88a6 100644
--- a/drivers/common/cnxk/roc_nix_priv.h
+++ b/drivers/common/cnxk/roc_nix_priv.h
@@ -16,7 +16,8 @@
#define NIX_SQB_LOWER_THRESH ((uint16_t)70)
/* Apply BP/DROP when CQ is 95% full */
-#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100)
+#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100)
+#define NIX_RQ_AURA_THRESH(x) (((x) * 95) / 100)
/* IRQ triggered when NIX_LF_CINTX_CNT[QCOUNT] crosses this value */
#define CQ_CQE_THRESH_DEFAULT 0x1ULL
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 8a5c839e5..cb1ce4b6f 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -29,6 +29,7 @@ INTERNAL {
roc_nix_fc_config_set;
roc_nix_fc_mode_set;
roc_nix_fc_mode_get;
+ rox_nix_fc_npa_bp_cfg;
roc_nix_get_base_chan;
roc_nix_get_pf;
roc_nix_get_pf_func;
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index bf4052c76..2060c8fe8 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -6,18 +6,6 @@
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
-static void
-cn10k_init_hws_ops(struct cn10k_sso_hws *ws, uintptr_t base)
-{
- ws->tag_wqe_op = base + SSOW_LF_GWS_WQE0;
- ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0;
- ws->updt_wqe_op = base + SSOW_LF_GWS_OP_UPD_WQP_GRP1;
- ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM;
- ws->swtag_untag_op = base + SSOW_LF_GWS_OP_SWTAG_UNTAG;
- ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH;
- ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED;
-}
-
static uint32_t
cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev)
{
@@ -56,7 +44,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
- cn10k_init_hws_ops(ws, ws->base);
ws->hws_id = port_id;
ws->swtag_req = 0;
ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -135,13 +122,14 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
cq_ds_cnt &= 0x3FFF3FFF0000;
while (aq_cnt || cq_ds_cnt || ds_cnt) {
- plt_write64(req, ws->getwrk_op);
+ plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
cn10k_sso_hws_get_work_empty(ws, &ev);
if (fn != NULL && ev.u64 != 0)
fn(arg, ev);
if (ev.sched_type != SSO_TT_EMPTY)
- cnxk_sso_hws_swtag_flush(ws->tag_wqe_op,
- ws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(
+ ws->base + SSOW_LF_GWS_WQE0,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
do {
val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
} while (val & BIT_ULL(56));
@@ -205,9 +193,11 @@ cn10k_sso_hws_reset(void *arg, void *hws)
if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) !=
SSO_TT_EMPTY) {
- plt_write64(BIT_ULL(16) | 1, ws->getwrk_op);
+ plt_write64(BIT_ULL(16) | 1,
+ ws->base + SSOW_LF_GWS_OP_GET_WORK0);
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
@@ -407,6 +397,80 @@ cn10k_sso_selftest(void)
return cnxk_sso_selftest(RTE_STR(event_cn10k));
}
+static int
+cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int rc;
+
+ RTE_SET_USED(event_dev);
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9);
+ if (rc)
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+ else
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+ return 0;
+}
+
+static void
+cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem,
+ void *tstmp_info)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ struct cn10k_sso_hws *ws = event_dev->data->ports[i];
+ ws->lookup_mem = lookup_mem;
+ ws->tstamp = tstmp_info;
+ }
+}
+
+static int
+cn10k_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cn10k_eth_rxq *rxq;
+ void *lookup_mem;
+ void *tstmp_info;
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id,
+ queue_conf);
+ if (rc)
+ return -EINVAL;
+ rxq = eth_dev->data->rx_queues[0];
+ lookup_mem = rxq->lookup_mem;
+ tstmp_info = rxq->tstamp;
+ cn10k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info);
+ cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.dev_infos_get = cn10k_sso_info_get,
.dev_configure = cn10k_sso_dev_configure,
@@ -420,6 +484,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.port_unlink = cn10k_sso_port_unlink,
.timeout_ticks = cnxk_sso_timeout_ticks,
+ .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get,
+ .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add,
+ .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del,
+ .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+ .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
@@ -502,6 +572,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn10k, cn10k_pci_sso_map);
RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
CNXK_SSO_GGRP_QOS "=<string>"
+ CNXK_SSO_FORCE_BP "=1"
CN10K_SSO_GW_MODE "=<int>"
CNXK_TIM_DISABLE_NPA "=1"
CNXK_TIM_CHNK_SLOTS "=<int>"
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index e2aa534c6..5dbae275b 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -18,7 +18,8 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev)
cn10k_sso_hws_forward_event(ws, ev);
break;
case RTE_EVENT_OP_RELEASE:
- cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, ws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
break;
default:
return 0;
@@ -69,7 +70,7 @@ cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
if (ws->swtag_req) {
ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
return 1;
}
@@ -94,7 +95,7 @@ cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
if (ws->swtag_req) {
ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
return ret;
}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 2f093a8dd..c7250bf9e 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,9 +5,13 @@
#ifndef __CN10K_WORKER_H__
#define __CN10K_WORKER_H__
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
+#include "cn10k_ethdev.h"
+#include "cn10k_rx.h"
+
/* SSO Operations */
static __rte_always_inline uint8_t
@@ -31,7 +35,8 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
{
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(ws->tag_wqe_op));
+ const uint8_t cur_tt =
+ CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0));
/* CNXK model
* cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
@@ -43,9 +48,11 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
if (new_tt == SSO_TT_UNTAGGED) {
if (cur_tt != SSO_TT_UNTAGGED)
- cnxk_sso_hws_swtag_untag(ws->swtag_untag_op);
+ cnxk_sso_hws_swtag_untag(ws->base +
+ SSOW_LF_GWS_OP_SWTAG_UNTAG);
} else {
- cnxk_sso_hws_swtag_norm(tag, new_tt, ws->swtag_norm_op);
+ cnxk_sso_hws_swtag_norm(tag, new_tt,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_NORM);
}
ws->swtag_req = 1;
}
@@ -57,8 +64,9 @@ cn10k_sso_hws_fwd_group(struct cn10k_sso_hws *ws, const struct rte_event *ev,
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- plt_write64(ev->u64, ws->updt_wqe_op);
- cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op);
+ plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+ cnxk_sso_hws_swtag_desched(tag, new_tt, grp,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
}
static __rte_always_inline void
@@ -68,7 +76,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
const uint8_t grp = ev->queue_id;
/* Group hasn't changed, Use SWTAG to forward the event */
- if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_wqe_op)) == grp)
+ if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp)
cn10k_sso_hws_fwd_swtag(ws, ev);
else
/*
@@ -93,12 +101,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
PLT_CPU_FEATURE_PREAMBLE
"caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
: [wdata] "+r"(gw.get_work)
- : [gw_loc] "r"(ws->getwrk_op)
+ : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
: "memory");
#else
- plt_write64(gw.u64[0], ws->getwrk_op);
+ plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
@@ -130,11 +139,12 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
: [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
- : [tag_loc] "r"(ws->tag_wqe_op)
+ : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
: "memory");
#else
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
#endif
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 0684417ea..072800c24 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -481,6 +481,88 @@ cn9k_sso_selftest(void)
return cnxk_sso_selftest(RTE_STR(event_cn9k));
}
+static int
+cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int rc;
+
+ RTE_SET_USED(event_dev);
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9);
+ if (rc)
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+ else
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+ return 0;
+}
+
+static void
+cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem,
+ void *tstmp_info)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ if (dev->dual_ws) {
+ struct cn9k_sso_hws_dual *dws =
+ event_dev->data->ports[i];
+ dws->lookup_mem = lookup_mem;
+ dws->tstamp = tstmp_info;
+ } else {
+ struct cn9k_sso_hws *ws = event_dev->data->ports[i];
+ ws->lookup_mem = lookup_mem;
+ ws->tstamp = tstmp_info;
+ }
+ }
+}
+
+static int
+cn9k_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cn9k_eth_rxq *rxq;
+ void *lookup_mem;
+ void *tstmp_info;
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (rc)
+ return -EINVAL;
+
+ rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id,
+ queue_conf);
+ if (rc)
+ return -EINVAL;
+
+ rxq = eth_dev->data->rx_queues[0];
+ lookup_mem = rxq->lookup_mem;
+ tstmp_info = rxq->tstamp;
+ cn9k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info);
+ cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (rc)
+ return -EINVAL;
+
+ return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.dev_infos_get = cn9k_sso_info_get,
.dev_configure = cn9k_sso_dev_configure,
@@ -494,6 +576,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.port_unlink = cn9k_sso_port_unlink,
.timeout_ticks = cnxk_sso_timeout_ticks,
+ .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get,
+ .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add,
+ .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del,
+ .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+ .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
@@ -571,6 +659,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn9k, cn9k_pci_sso_map);
RTE_PMD_REGISTER_KMOD_DEP(event_cn9k, "vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(event_cn9k, CNXK_SSO_XAE_CNT "=<int>"
CNXK_SSO_GGRP_QOS "=<string>"
+ CNXK_SSO_FORCE_BP "=1"
CN9K_SSO_SINGLE_WS "=1"
CNXK_TIM_DISABLE_NPA "=1"
CNXK_TIM_CHNK_SLOTS "=<int>"
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 38fca08fb..f5a440146 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -5,9 +5,13 @@
#ifndef __CN9K_WORKER_H__
#define __CN9K_WORKER_H__
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
+#include "cn9k_ethdev.h"
+#include "cn9k_rx.h"
+
/* SSO Operations */
static __rte_always_inline uint8_t
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 7189ee3a7..cfd7fb971 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -571,6 +571,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
&dev->xae_cnt);
rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict,
dev);
+ rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_value,
+ &dev->force_ena_bp);
rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_value,
&single_ws);
rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 668e51d62..b65d725f5 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -6,6 +6,8 @@
#define __CNXK_EVENTDEV_H__
#include <rte_devargs.h>
+#include <rte_ethdev.h>
+#include <rte_event_eth_rx_adapter.h>
#include <rte_kvargs.h>
#include <rte_mbuf_pool_ops.h>
#include <rte_pci.h>
@@ -18,6 +20,7 @@
#define CNXK_SSO_XAE_CNT "xae_cnt"
#define CNXK_SSO_GGRP_QOS "qos"
+#define CNXK_SSO_FORCE_BP "force_rx_bp"
#define CN9K_SSO_SINGLE_WS "single_ws"
#define CN10K_SSO_GW_MODE "gw_mode"
@@ -81,7 +84,10 @@ struct cnxk_sso_evdev {
uint64_t nb_xaq_cfg;
rte_iova_t fc_iova;
struct rte_mempool *xaq_pool;
+ uint64_t rx_offloads;
uint64_t adptr_xae_cnt;
+ uint16_t rx_adptr_pool_cnt;
+ uint64_t *rx_adptr_pools;
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
@@ -89,25 +95,18 @@ struct cnxk_sso_evdev {
uint32_t xae_cnt;
uint8_t qos_queue_cnt;
struct cnxk_sso_qos *qos_parse_data;
+ uint8_t force_ena_bp;
/* CN9K */
uint8_t dual_ws;
/* CN10K */
uint8_t gw_mode;
} __rte_cache_aligned;
-/* CN10K HWS ops */
-#define CN10K_SSO_HWS_OPS \
- uintptr_t swtag_desched_op; \
- uintptr_t swtag_flush_op; \
- uintptr_t swtag_untag_op; \
- uintptr_t swtag_norm_op; \
- uintptr_t updt_wqe_op; \
- uintptr_t tag_wqe_op; \
- uintptr_t getwrk_op
-
struct cn10k_sso_hws {
- /* Get Work Fastpath data */
- CN10K_SSO_HWS_OPS;
+ uint64_t base;
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint32_t gw_wdata;
uint8_t swtag_req;
uint8_t hws_id;
@@ -115,7 +114,6 @@ struct cn10k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base;
uintptr_t lmt_base;
} __rte_cache_aligned;
@@ -132,6 +130,9 @@ struct cn10k_sso_hws {
struct cn9k_sso_hws {
/* Get Work Fastpath data */
CN9K_SSO_HWS_OPS;
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint8_t swtag_req;
uint8_t hws_id;
/* Add Work Fastpath data */
@@ -148,6 +149,9 @@ struct cn9k_sso_hws_state {
struct cn9k_sso_hws_dual {
/* Get Work Fastpath data */
struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint8_t swtag_req;
uint8_t vws; /* Ping pong bit */
uint8_t hws_id;
@@ -250,4 +254,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev,
/* CN9K */
void cn9k_sso_set_rsrc(void *arg);
+/* Common adapter ops */
+int cnxk_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf);
+int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id);
+int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev);
+int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev);
+
#endif /* __CNXK_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 89a1d82c1..24bfd985e 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -2,6 +2,7 @@
* Copyright(C) 2021 Marvell.
*/
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
void
@@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
int i;
switch (event_type) {
+ case RTE_EVENT_TYPE_ETHDEV: {
+ struct cnxk_eth_rxq_sp *rxq = data;
+ uint64_t *old_ptr;
+
+ for (i = 0; i < dev->rx_adptr_pool_cnt; i++) {
+ if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i])
+ return;
+ }
+
+ dev->rx_adptr_pool_cnt++;
+ old_ptr = dev->rx_adptr_pools;
+ dev->rx_adptr_pools = rte_realloc(
+ dev->rx_adptr_pools,
+ sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0);
+ if (dev->rx_adptr_pools == NULL) {
+ dev->adptr_xae_cnt += rxq->qconf.mp->size;
+ dev->rx_adptr_pools = old_ptr;
+ dev->rx_adptr_pool_cnt--;
+ return;
+ }
+ dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] =
+ (uint64_t)rxq->qconf.mp;
+
+ dev->adptr_xae_cnt += rxq->qconf.mp->size;
+ break;
+ }
case RTE_EVENT_TYPE_TIMER: {
struct cnxk_tim_ring *timr = data;
uint16_t *old_ring_ptr;
@@ -65,3 +92,152 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
break;
}
}
+
+static int
+cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id,
+ uint16_t port_id, const struct rte_event *ev,
+ uint8_t custom_flowid)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+ rq->sso_ena = 1;
+ rq->tt = ev->sched_type;
+ rq->hwgrp = ev->queue_id;
+ rq->flow_tag_width = 20;
+ rq->wqe_skip = 1;
+ rq->tag_mask = (port_id & 0xF) << 20;
+ rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4))
+ << 24;
+
+ if (custom_flowid) {
+ rq->flow_tag_width = 0;
+ rq->tag_mask |= ev->flow_id;
+ }
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+static int
+cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+ rq->sso_ena = 0;
+ rq->flow_tag_width = 32;
+ rq->tag_mask = 0;
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+int
+cnxk_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ uint16_t port = eth_dev->data->port_id;
+ struct cnxk_eth_rxq_sp *rxq_sp;
+ int i, rc = 0;
+
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+ rxq_sp = eth_dev->data->rx_queues[i];
+ rxq_sp = rxq_sp - 1;
+ cnxk_sso_updt_xae_cnt(dev, rxq_sp,
+ RTE_EVENT_TYPE_ETHDEV);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc |= cnxk_sso_rxq_enable(
+ cnxk_eth_dev, i, port, &queue_conf->ev,
+ !!(queue_conf->rx_queue_flags &
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID));
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, true,
+ dev->force_ena_bp);
+ }
+ } else {
+ rxq_sp = eth_dev->data->rx_queues[rx_queue_id];
+ rxq_sp = rxq_sp - 1;
+ cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc |= cnxk_sso_rxq_enable(
+ cnxk_eth_dev, (uint16_t)rx_queue_id, port,
+ &queue_conf->ev,
+ !!(queue_conf->rx_queue_flags &
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID));
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, true,
+ dev->force_ena_bp);
+ }
+
+ if (rc < 0) {
+ plt_err("Failed to configure Rx adapter port=%d, q=%d", port,
+ queue_conf->ev.queue_id);
+ return rc;
+ }
+
+ dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags;
+
+ return 0;
+}
+
+int
+cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ struct cnxk_eth_rxq_sp *rxq_sp;
+ int i, rc = 0;
+
+ RTE_SET_USED(event_dev);
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+ rxq_sp = eth_dev->data->rx_queues[rx_queue_id];
+ rxq_sp = rxq_sp - 1;
+ rc = cnxk_sso_rxq_disable(cnxk_eth_dev, i);
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, false,
+ dev->force_ena_bp);
+ }
+ } else {
+ rxq_sp = eth_dev->data->rx_queues[rx_queue_id];
+ rxq_sp = rxq_sp - 1;
+ rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id);
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, false,
+ dev->force_ena_bp);
+ }
+
+ if (rc < 0)
+ plt_err("Failed to clear Rx adapter config port=%d, q=%d",
+ eth_dev->data->port_id, rx_queue_id);
+
+ return rc;
+}
+
+int
+cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev)
+{
+ RTE_SET_USED(event_dev);
+ RTE_SET_USED(eth_dev);
+
+ return 0;
+}
+
+int
+cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev)
+{
+ RTE_SET_USED(event_dev);
+ RTE_SET_USED(eth_dev);
+
+ return 0;
+}
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 87bb9f76a..eda562f5b 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -21,4 +21,11 @@ sources = files(
'cnxk_tim_worker.c',
)
-deps += ['bus_pci', 'common_cnxk']
+extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
+foreach flag: extra_flags
+ if cc.has_argument(flag)
+ cflags += flag
+ endif
+endforeach
+
+deps += ['bus_pci', 'common_cnxk', 'net_cnxk']
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v5 2/7] event/cnxk: add Rx adapter fastpath ops
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 1/7] event/cnxk: add Rx adapter support pbhagavatula
@ 2021-06-29 8:01 ` pbhagavatula
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 3/7] event/cnxk: add Tx adapter support pbhagavatula
` (5 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-29 8:01 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Rx adapter fastpath operations.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 136 +++++++-
drivers/event/cnxk/cn10k_worker.c | 54 ----
drivers/event/cnxk/cn10k_worker.h | 97 +++++-
drivers/event/cnxk/cn10k_worker_deq.c | 44 +++
drivers/event/cnxk/cn10k_worker_deq_burst.c | 29 ++
drivers/event/cnxk/cn10k_worker_deq_tmo.c | 72 +++++
drivers/event/cnxk/cn9k_eventdev.c | 305 +++++++++++++++++-
drivers/event/cnxk/cn9k_worker.c | 117 -------
drivers/event/cnxk/cn9k_worker.h | 174 ++++++++--
drivers/event/cnxk/cn9k_worker_deq.c | 44 +++
drivers/event/cnxk/cn9k_worker_deq_burst.c | 29 ++
drivers/event/cnxk/cn9k_worker_deq_tmo.c | 72 +++++
drivers/event/cnxk/cn9k_worker_dual_deq.c | 53 +++
.../event/cnxk/cn9k_worker_dual_deq_burst.c | 30 ++
drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 89 +++++
drivers/event/cnxk/cnxk_eventdev.h | 1 +
drivers/event/cnxk/meson.build | 9 +
17 files changed, 1124 insertions(+), 231 deletions(-)
create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c
create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c
create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 2060c8fe8..ba7d95fff 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -237,17 +237,141 @@ static void
cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst;
-
- event_dev->dequeue = cn10k_sso_hws_deq;
- event_dev->dequeue_burst = cn10k_sso_hws_deq_burst;
- if (dev->is_timeout_deq) {
- event_dev->dequeue = cn10k_sso_hws_tmo_deq;
- event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_deq_seg
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_seg_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_tmo_deq_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_deq
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_tmo_deq
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_tmo_deq_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
}
}
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index 5dbae275b..c71aa3732 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -60,57 +60,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-
-uint16_t __rte_hot
-cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn10k_sso_hws *ws = port;
-
- RTE_SET_USED(timeout_ticks);
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
- return 1;
- }
-
- return cn10k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
- uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn10k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn10k_sso_hws *ws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
- return ret;
- }
-
- ret = cn10k_sso_hws_get_work(ws, ev);
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
- ret = cn10k_sso_hws_get_work(ws, ev);
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index c7250bf9e..b724083ca 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -87,20 +87,37 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
cn10k_sso_hws_fwd_group(ws, ev, grp);
}
+static __rte_always_inline void
+cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+ const uint32_t tag, const uint32_t flags,
+ const void *const lookup_mem)
+{
+ const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+
+ cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+ (struct rte_mbuf *)mbuf, lookup_mem,
+ mbuf_init | ((uint64_t)port_id) << 48, flags);
+}
+
static __rte_always_inline uint16_t
-cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
+cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
+ const uint32_t flags, void *lookup_mem)
{
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
gw.get_work = ws->gw_wdata;
#if defined(RTE_ARCH_ARM64) && !defined(__clang__)
asm volatile(
PLT_CPU_FEATURE_PREAMBLE
"caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
- : [wdata] "+r"(gw.get_work)
+ "sub %[mbuf], %H[wdata], #0x80 \n"
+ : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf)
: [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
: "memory");
#else
@@ -109,11 +126,34 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
roc_load_pair(gw.u64[0], gw.u64[1],
ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
+ ws->tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -128,6 +168,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t mbuf;
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -138,7 +179,9 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
" ldp %[tag], %[wqp], [%[tag_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
: "memory");
#else
@@ -146,12 +189,25 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
roc_load_pair(gw.u64[0], gw.u64[1],
ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, 0, NULL);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -170,16 +226,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
const struct rte_event ev[],
uint16_t nb_events);
-uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
#endif
diff --git a/drivers/event/cnxk/cn10k_worker_deq.c b/drivers/event/cnxk/cn10k_worker_deq.c
new file mode 100644
index 000000000..36ec454cc
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return 1; \
+ } \
+ \
+ return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return 1; \
+ } \
+ \
+ return cn10k_sso_hws_get_work( \
+ ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn10k_worker_deq_burst.c b/drivers/event/cnxk/cn10k_worker_deq_burst.c
new file mode 100644
index 000000000..29ecc551c
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq_burst.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn10k_worker_deq_tmo.c b/drivers/event/cnxk/cn10k_worker_deq_tmo.c
new file mode 100644
index 000000000..c8524a27b
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq_tmo.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return ret; \
+ } \
+ \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return ret; \
+ } \
+ \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 072800c24..e386cb784 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -252,17 +252,202 @@ static void
cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ /* Single WS modes */
+ const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_tmo[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_tmo_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_tmo_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_deq_tmo_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ /* Dual WS modes */
+ const event_dequeue_t sso_hws_dual_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_tmo[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_tmo_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_tmo_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_tmo_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
event_dev->enqueue = cn9k_sso_hws_enq;
event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst;
-
- event_dev->dequeue = cn9k_sso_hws_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_deq_burst;
- if (dev->deq_tmo_ns) {
- event_dev->dequeue = cn9k_sso_hws_tmo_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_deq_seg
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_seg_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_deq_tmo_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_tmo_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_deq
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_deq_tmo
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_tmo_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
}
if (dev->dual_ws) {
@@ -272,14 +457,110 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
event_dev->enqueue_forward_burst =
cn9k_sso_hws_dual_enq_fwd_burst;
- event_dev->dequeue = cn9k_sso_hws_dual_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst;
- if (dev->deq_tmo_ns) {
- event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq;
- event_dev->dequeue_burst =
- cn9k_sso_hws_dual_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_dual_deq_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_dual_deq_tmo_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst =
+ sso_hws_dual_deq_tmo_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_dual_deq
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_dual_deq_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_dual_deq_tmo
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst =
+ sso_hws_dual_deq_tmo_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ }
}
}
+
+ rte_mb();
}
static void *
diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c
index 9ceacc98d..538bc4b0b 100644
--- a/drivers/event/cnxk/cn9k_worker.c
+++ b/drivers/event/cnxk/cn9k_worker.c
@@ -60,60 +60,6 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-uint16_t __rte_hot
-cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws *ws = port;
-
- RTE_SET_USED(timeout_ticks);
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_op);
- return 1;
- }
-
- return cn9k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
- uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws *ws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_op);
- return ret;
- }
-
- ret = cn9k_sso_hws_get_work(ws, ev);
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
- ret = cn9k_sso_hws_get_work(ws, ev);
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
-
/* Dual ws ops. */
uint16_t __rte_hot
@@ -171,66 +117,3 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws_dual *dws = port;
- uint16_t gw;
-
- RTE_SET_USED(timeout_ticks);
- if (dws->swtag_req) {
- dws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
- return 1;
- }
-
- gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- return gw;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws_dual *dws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (dws->swtag_req) {
- dws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
- return ret;
- }
-
- ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) {
- ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- }
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index f5a440146..c01c00e1d 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -128,17 +128,36 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws,
}
}
+static __rte_always_inline void
+cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+ const uint32_t tag, const uint32_t flags,
+ const void *const lookup_mem)
+{
+ const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+
+ cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+ (struct rte_mbuf *)mbuf, lookup_mem,
+ mbuf_init | ((uint64_t)port_id) << 48, flags);
+}
+
static __rte_always_inline uint16_t
cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
struct cn9k_sso_hws_state *ws_pair,
- struct rte_event *ev)
+ struct rte_event *ev, const uint32_t flags,
+ const void *const lookup_mem,
+ struct cnxk_timesync_info *const tstamp)
{
const uint64_t set_gw = BIT_ULL(16) | 1;
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
+ if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+ rte_prefetch_non_temporal(lookup_mem);
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
"rty%=: \n"
@@ -147,7 +166,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
" tbnz %[tag], 63, rty%= \n"
"done%=: str %[gw], [%[pong]] \n"
" dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ " prfm pldl1keep, [%[mbuf]] \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op),
[gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op));
#else
@@ -156,12 +178,34 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
plt_write64(set_gw, ws_pair->getwrk_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -169,16 +213,22 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
}
static __rte_always_inline uint16_t
-cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
+cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev,
+ const uint32_t flags, const void *const lookup_mem)
{
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
plt_write64(BIT_ULL(16) | /* wait for work. */
1, /* Use Mask set 0. */
ws->getwrk_op);
+
+ if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+ rte_prefetch_non_temporal(lookup_mem);
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
" ldr %[tag], [%[tag_loc]] \n"
@@ -190,7 +240,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
" ldr %[wqp], [%[wqp_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ " prfm pldl1keep, [%[mbuf]] \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
#else
gw.u64[0] = plt_read64(ws->tag_op);
@@ -198,12 +251,35 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
+ ws->tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -218,6 +294,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t mbuf;
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -230,7 +307,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
" ldr %[wqp], [%[wqp_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
#else
gw.u64[0] = plt_read64(ws->tag_op);
@@ -238,12 +317,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, 0, NULL);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -274,28 +366,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port,
const struct rte_event ev[],
uint16_t nb_events);
-uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-
-uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
#endif
diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c
new file mode 100644
index 000000000..51ccaf4ec
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return 1; \
+ } \
+ \
+ return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return 1; \
+ } \
+ \
+ return cn9k_sso_hws_get_work( \
+ ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_deq_burst.c b/drivers/event/cnxk/cn9k_worker_deq_burst.c
new file mode 100644
index 000000000..4e2801459
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq_burst.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
new file mode 100644
index 000000000..9713d1ef0
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c
new file mode 100644
index 000000000..709fa2d9e
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t gw; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return 1; \
+ } \
+ \
+ gw = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ return gw; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t gw; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return 1; \
+ } \
+ \
+ gw = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ return gw; \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
new file mode 100644
index 000000000..d50e1cf83
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
new file mode 100644
index 000000000..a0508fdf0
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], \
+ &dws->ws_state[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ } \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_tmo_##name(port, ev, \
+ timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], \
+ &dws->ws_state[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ } \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index b65d725f5..9d5d2d033 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -33,6 +33,7 @@
#define CNXK_SSO_MZ_NAME "cnxk_evdev_mz"
#define CNXK_SSO_XAQ_CACHE_CNT (0x7)
#define CNXK_SSO_XAQ_SLACK (8)
+#define CNXK_SSO_WQE_SG_PTR (9)
#define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY)
#define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY)
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index eda562f5b..c5c1c0ee8 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -11,8 +11,17 @@ endif
sources = files(
'cn9k_eventdev.c',
'cn9k_worker.c',
+ 'cn9k_worker_deq.c',
+ 'cn9k_worker_deq_burst.c',
+ 'cn9k_worker_deq_tmo.c',
+ 'cn9k_worker_dual_deq.c',
+ 'cn9k_worker_dual_deq_burst.c',
+ 'cn9k_worker_dual_deq_tmo.c',
'cn10k_eventdev.c',
'cn10k_worker.c',
+ 'cn10k_worker_deq.c',
+ 'cn10k_worker_deq_burst.c',
+ 'cn10k_worker_deq_tmo.c',
'cnxk_eventdev.c',
'cnxk_eventdev_adptr.c',
'cnxk_eventdev_selftest.c',
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v5 3/7] event/cnxk: add Tx adapter support
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
@ 2021-06-29 8:01 ` pbhagavatula
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
` (4 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-29 8:01 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Tx adapter.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 4 +-
doc/guides/rel_notes/release_21_08.rst | 6 +-
drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++++++
drivers/event/cnxk/cn9k_eventdev.c | 117 +++++++++++++++++++++++
drivers/event/cnxk/cnxk_eventdev.h | 21 +++-
drivers/event/cnxk/cnxk_eventdev_adptr.c | 106 ++++++++++++++++++++
6 files changed, 339 insertions(+), 6 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index b7e82c127..6fdccc2ab 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -42,7 +42,9 @@ Features of the OCTEON cnxk SSO PMD are:
- HW managed packets enqueued from ethdev to eventdev exposed through event eth
RX adapter.
- N:1 ethernet device Rx queue to Event queue mapping.
-- Full Rx offload support defined through ethdev queue configuration.
+- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE``
+ capability while maintaining receive packet order.
+- Full Rx/Tx offload support defined through ethdev queue configuration.
Prerequisites and Compilation procedure
---------------------------------------
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 3892c8017..80ff93269 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -60,10 +60,10 @@ New Features
* Added net/cnxk driver which provides the support for the integrated ethernet
device.
-* **Added support for Marvell CN10K, CN9K, event Rx adapter.**
+* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.**
- * Added Rx adapter support for event/cnxk when the ethernet device requested is
- net/cnxk.
+ * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
+ is net/cnxk.
Removed Items
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index ba7d95fff..8a9b04a3d 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+ ws->tx_base = ws->base;
ws->hws_id = port_id;
ws->swtag_req = 0;
ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
}
+static int
+cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ if (dev->tx_adptr_data == NULL)
+ return 0;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ struct cn10k_sso_hws *ws = event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(ws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn10k_sso_hws) +
+ (sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = ws;
+ }
+
+ return 0;
+}
+
static void
cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
@@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
{
int rc;
+ rc = cn10k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+
rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
cn10k_sso_hws_flush_events);
if (rc < 0)
@@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (ret)
+ *caps = 0;
+ else
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+ return 0;
+}
+
+static int
+cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ rc = cn10k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+ cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ return cn10k_sso_updt_tx_adptr_data(event_dev);
+}
+
static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.dev_infos_get = cn10k_sso_info_get,
.dev_configure = cn10k_sso_dev_configure,
@@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get,
+ .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add,
+ .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index e386cb784..bdc563223 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
}
+static int
+cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ if (dev->tx_adptr_data == NULL)
+ return 0;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ if (dev->dual_ws) {
+ struct cn9k_sso_hws_dual *dws =
+ event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(dws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn9k_sso_hws_dual) +
+ (sizeof(uint64_t) *
+ (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ dws = RTE_PTR_ADD(ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&dws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = dws;
+ } else {
+ struct cn9k_sso_hws *ws = event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(ws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn9k_sso_hws_dual) +
+ (sizeof(uint64_t) *
+ (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ ws = RTE_PTR_ADD(ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = ws;
+ }
+ }
+ rte_mb();
+
+ return 0;
+}
+
static void
cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
@@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev)
{
int rc;
+ rc = cn9k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+
rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset,
cn9k_sso_hws_flush_events);
if (rc < 0)
@@ -844,6 +908,55 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (ret)
+ *caps = 0;
+ else
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+ return 0;
+}
+
+static int
+cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ rc = cn9k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+ cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ return cn9k_sso_updt_tx_adptr_data(event_dev);
+}
+
static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.dev_infos_get = cn9k_sso_info_get,
.dev_configure = cn9k_sso_dev_configure,
@@ -863,6 +976,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get,
+ .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add,
+ .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 9d5d2d033..458fdc8d9 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -8,6 +8,7 @@
#include <rte_devargs.h>
#include <rte_ethdev.h>
#include <rte_event_eth_rx_adapter.h>
+#include <rte_event_eth_tx_adapter.h>
#include <rte_kvargs.h>
#include <rte_mbuf_pool_ops.h>
#include <rte_pci.h>
@@ -86,9 +87,12 @@ struct cnxk_sso_evdev {
rte_iova_t fc_iova;
struct rte_mempool *xaq_pool;
uint64_t rx_offloads;
+ uint64_t tx_offloads;
uint64_t adptr_xae_cnt;
uint16_t rx_adptr_pool_cnt;
uint64_t *rx_adptr_pools;
+ uint64_t *tx_adptr_data;
+ uint16_t max_port_id;
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
@@ -115,7 +119,10 @@ struct cn10k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
+ /* Tx Fastpath data */
+ uint64_t tx_base __rte_cache_aligned;
uintptr_t lmt_base;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
/* CN9K HWS ops */
@@ -140,7 +147,9 @@ struct cn9k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base;
+ /* Tx Fastpath data */
+ uint64_t base __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct cn9k_sso_hws_state {
@@ -160,7 +169,9 @@ struct cn9k_sso_hws_dual {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base[2];
+ /* Tx Fastpath data */
+ uint64_t base[2] __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct cnxk_sso_hws_cookie {
@@ -267,5 +278,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
const struct rte_eth_dev *eth_dev);
int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
const struct rte_eth_dev *eth_dev);
+int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id);
+int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id);
#endif /* __CNXK_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 24bfd985e..548d7b81c 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -5,6 +5,8 @@
#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
+#define CNXK_SSO_SQB_LIMIT (0x180)
+
void
cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
uint32_t event_type)
@@ -241,3 +243,107 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
return 0;
}
+
+static int
+cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs)
+{
+ uint16_t sqb_limit;
+
+ sqb_limit = RTE_MIN(nb_sqb_bufs, sq->nb_sqb_bufs);
+ return roc_npa_aura_limit_modify(sq->aura_handle, sqb_limit);
+}
+
+static int
+cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev,
+ uint16_t eth_port_id, uint16_t tx_queue_id,
+ void *txq)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ uint16_t max_port_id = dev->max_port_id;
+ uint64_t *txq_data = dev->tx_adptr_data;
+
+ if (txq_data == NULL || eth_port_id > max_port_id) {
+ max_port_id = RTE_MAX(max_port_id, eth_port_id);
+ txq_data = rte_realloc_socket(
+ txq_data,
+ (sizeof(uint64_t) * (max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, event_dev->data->socket_id);
+ if (txq_data == NULL)
+ return -ENOMEM;
+ }
+
+ ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT])
+ txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq;
+ dev->max_port_id = max_port_id;
+ dev->tx_adptr_data = txq_data;
+ return 0;
+}
+
+int
+cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ struct roc_nix_sq *sq;
+ int i, ret;
+ void *txq;
+
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
+ txq = eth_dev->data->tx_queues[i];
+ sq = &cnxk_eth_dev->sqs[i];
+ cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, i, txq);
+ if (ret < 0)
+ return ret;
+ }
+ } else {
+ txq = eth_dev->data->tx_queues[tx_queue_id];
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id, txq);
+ if (ret < 0)
+ return ret;
+ }
+
+ dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags;
+
+ return 0;
+}
+
+int
+cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct roc_nix_sq *sq;
+ int i, ret;
+
+ RTE_SET_USED(event_dev);
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
+ sq = &cnxk_eth_dev->sqs[i];
+ cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id,
+ NULL);
+ if (ret < 0)
+ return ret;
+ }
+ } else {
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id, NULL);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v5 4/7] event/cnxk: add Tx adapter fastpath ops
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 3/7] event/cnxk: add Tx adapter support pbhagavatula
@ 2021-06-29 8:01 ` pbhagavatula
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
` (3 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-29 8:01 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Tx adapter fastpath operations.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 38 ++++++++
drivers/event/cnxk/cn10k_worker.h | 67 ++++++++++++++
drivers/event/cnxk/cn10k_worker_tx_enq.c | 23 +++++
drivers/event/cnxk/cn10k_worker_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cn9k_eventdev.c | 81 +++++++++++++++++
drivers/event/cnxk/cn9k_worker.h | 87 +++++++++++++++++++
drivers/event/cnxk/cn9k_worker_dual_tx_enq.c | 23 +++++
.../event/cnxk/cn9k_worker_dual_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cn9k_worker_tx_enq.c | 23 +++++
drivers/event/cnxk/cn9k_worker_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/meson.build | 6 ++
11 files changed, 417 insertions(+)
create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c
create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 8a9b04a3d..e462f770c 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
#undef R
};
+ /* Tx modes */
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
@@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
}
}
+
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
+
+ event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
}
static void
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index b724083ca..3c90c8500 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -11,6 +11,7 @@
#include "cn10k_ethdev.h"
#include "cn10k_rx.h"
+#include "cn10k_tx.h"
/* SSO Operations */
@@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
NIX_RX_FASTPATH_MODES
#undef R
+static __rte_always_inline const struct cn10k_eth_txq *
+cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+ return (const struct cn10k_eth_txq *)
+ txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline uint16_t
+cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
+ uint64_t *cmd,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ const struct cn10k_eth_txq *txq;
+ struct rte_mbuf *m = ev->mbuf;
+ uint16_t ref_cnt = m->refcnt;
+ uintptr_t lmt_addr;
+ uint16_t lmt_id;
+ uintptr_t pa;
+
+ lmt_addr = ws->lmt_base;
+ ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+ txq = cn10k_sso_hws_xtract_meta(m, txq_data);
+ cn10k_nix_tx_skeleton(txq, cmd, flags);
+ /* Perform header writes before barrier for TSO */
+ if (flags & NIX_TX_OFFLOAD_TSO_F)
+ cn10k_nix_xmit_prepare_tso(m, flags);
+
+ cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt);
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw =
+ cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags);
+ pa = txq->io_addr | ((segdw - 1) << 4);
+ } else {
+ pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
+ }
+ if (!ev->sched_type)
+ cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
+
+ roc_lmt_submit_steorl(lmt_id, pa);
+
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if (ref_cnt > 1)
+ return 1;
+ }
+
+ cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
+ ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+ return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
#endif
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c
new file mode 100644
index 000000000..f9968ac0d
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn10k_sso_hws_event_tx( \
+ ws, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
new file mode 100644
index 000000000..a24fc42e5
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn10k_sso_hws_event_tx( \
+ ws, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index bdc563223..af97020f2 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -430,6 +430,39 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
#undef R
};
+ /* Tx modes */
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
event_dev->enqueue = cn9k_sso_hws_enq;
event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
@@ -510,6 +543,25 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
}
}
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
+
if (dev->dual_ws) {
event_dev->enqueue = cn9k_sso_hws_dual_enq;
event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst;
@@ -618,8 +670,37 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
NIX_RX_OFFLOAD_RSS_F)];
}
}
+
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM]
+ */
+ event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
}
+ event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
rte_mb();
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index c01c00e1d..5aa053c58 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -11,6 +11,7 @@
#include "cn9k_ethdev.h"
#include "cn9k_rx.h"
+#include "cn9k_tx.h"
/* SSO Operations */
@@ -416,4 +417,90 @@ NIX_RX_FASTPATH_MODES
NIX_RX_FASTPATH_MODES
#undef R
+static __rte_always_inline const struct cn9k_eth_txq *
+cn9k_sso_hws_xtract_meta(struct rte_mbuf *m,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+ return (const struct cn9k_eth_txq *)
+ txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline void
+cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m,
+ uint64_t *cmd, const uint32_t flags)
+{
+ roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags));
+ cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt);
+}
+
+static __rte_always_inline uint16_t
+cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ struct rte_mbuf *m = ev->mbuf;
+ const struct cn9k_eth_txq *txq;
+ uint16_t ref_cnt = m->refcnt;
+
+ /* Perform header writes before barrier for TSO */
+ cn9k_nix_xmit_prepare_tso(m, flags);
+ /* Lets commit any changes in the packet here in case when
+ * fast free is set as no further changes will be made to mbuf.
+ * In case of fast free is not set, both cn9k_nix_prepare_mseg()
+ * and cn9k_nix_xmit_prepare() has a barrier after refcnt update.
+ */
+ if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
+ rte_io_wmb();
+ txq = cn9k_sso_hws_xtract_meta(m, txq_data);
+ cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags);
+
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
+ if (!CNXK_TT_FROM_EVENT(ev->event)) {
+ cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
+ cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+ cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
+ txq->io_addr, segdw);
+ } else {
+ cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr,
+ segdw);
+ }
+ } else {
+ if (!CNXK_TT_FROM_EVENT(ev->event)) {
+ cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
+ cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+ cn9k_nix_xmit_one(cmd, txq->lmt_addr,
+ txq->io_addr, flags);
+ } else {
+ cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr,
+ flags);
+ }
+ }
+
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if (ref_cnt > 1)
+ return 1;
+ }
+
+ cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG,
+ base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+ return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
#endif
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
new file mode 100644
index 000000000..92e2981f0
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn9k_sso_hws_dual *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base[!ws->vws], &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
new file mode 100644
index 000000000..dfb574cf9
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn9k_sso_hws_dual *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base[!ws->vws], &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c
new file mode 100644
index 000000000..3df649c0c
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
new file mode 100644
index 000000000..0efe29113
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index c5c1c0ee8..13e0634e8 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -17,11 +17,17 @@ sources = files(
'cn9k_worker_dual_deq.c',
'cn9k_worker_dual_deq_burst.c',
'cn9k_worker_dual_deq_tmo.c',
+ 'cn9k_worker_tx_enq.c',
+ 'cn9k_worker_tx_enq_seg.c',
+ 'cn9k_worker_dual_tx_enq.c',
+ 'cn9k_worker_dual_tx_enq_seg.c',
'cn10k_eventdev.c',
'cn10k_worker.c',
'cn10k_worker_deq.c',
'cn10k_worker_deq_burst.c',
'cn10k_worker_deq_tmo.c',
+ 'cn10k_worker_tx_enq.c',
+ 'cn10k_worker_tx_enq_seg.c',
'cnxk_eventdev.c',
'cnxk_eventdev_adptr.c',
'cnxk_eventdev_selftest.c',
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v5 5/7] event/cnxk: add Rx adapter vector support
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (2 preceding siblings ...)
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
@ 2021-06-29 8:01 ` pbhagavatula
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
` (2 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-29 8:01 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add event vector support for cnxk event Rx adapter, add control path
APIs to get vector limits and ability to configure event vectorization
on a given Rx queue.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 2 +
drivers/event/cnxk/cn10k_eventdev.c | 106 ++++++++++++++++++++++-
drivers/event/cnxk/cnxk_eventdev.h | 2 +
drivers/event/cnxk/cnxk_eventdev_adptr.c | 25 ++++++
drivers/net/cnxk/cnxk_ethdev.h | 2 +-
5 files changed, 135 insertions(+), 2 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 6fdccc2ab..0297cd3d5 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -45,6 +45,8 @@ Features of the OCTEON cnxk SSO PMD are:
- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE``
capability while maintaining receive packet order.
- Full Rx/Tx offload support defined through ethdev queue configuration.
+- HW managed event vectorization on CN10K for packets enqueued from ethdev to
+ eventdev configurable per each Rx queue in Rx adapter.
Prerequisites and Compilation procedure
---------------------------------------
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index e462f770c..e85fa4785 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
else
*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
- RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
return 0;
}
@@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn10k_sso_rx_adapter_vector_limits(
+ const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+ struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev;
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (ret)
+ return -ENOTSUP;
+
+ cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+ limits->log2_sz = true;
+ limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+ limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+ limits->min_timeout_ns =
+ (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100;
+ limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns;
+
+ return 0;
+}
+
+static int
+cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
+ uint16_t port_id, uint16_t rq_id, uint16_t sz,
+ uint64_t tmo_ns, struct rte_mempool *vmp)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+
+ if (!rq->sso_ena)
+ return -EINVAL;
+ if (rq->flow_tag_width == 0)
+ return -EINVAL;
+
+ rq->vwqe_ena = 1;
+ rq->vwqe_first_skip = 0;
+ rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id);
+ rq->vwqe_max_sz_exp = rte_log2_u32(sz);
+ rq->vwqe_wait_tmo =
+ tmo_ns /
+ ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100);
+ rq->tag_mask = (port_id & 0xF) << 20;
+ rq->tag_mask |=
+ (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4))
+ << 24;
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+static int
+cn10k_sso_rx_adapter_vector_config(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_event_vector_config *config)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev;
+ struct cnxk_sso_evdev *dev;
+ int i, rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ dev = cnxk_sso_pmd_priv(event_dev);
+ cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+ cnxk_sso_updt_xae_cnt(dev, config->vector_mp,
+ RTE_EVENT_TYPE_ETHDEV_VECTOR);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc = cnxk_sso_rx_adapter_vwqe_enable(
+ cnxk_eth_dev, eth_dev->data->port_id, i,
+ config->vector_sz, config->vector_timeout_ns,
+ config->vector_mp);
+ if (rc)
+ return -EINVAL;
+ }
+ } else {
+
+ cnxk_sso_updt_xae_cnt(dev, config->vector_mp,
+ RTE_EVENT_TYPE_ETHDEV_VECTOR);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc = cnxk_sso_rx_adapter_vwqe_enable(
+ cnxk_eth_dev, eth_dev->data->port_id, rx_queue_id,
+ config->vector_sz, config->vector_timeout_ns,
+ config->vector_mp);
+ if (rc)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int
cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
const struct rte_eth_dev *eth_dev, uint32_t *caps)
@@ -739,6 +839,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_rx_adapter_vector_limits_get = cn10k_sso_rx_adapter_vector_limits,
+ .eth_rx_adapter_event_vector_config =
+ cn10k_sso_rx_adapter_vector_config,
+
.eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get,
.eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add,
.eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 458fdc8d9..3783e0c95 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -96,6 +96,8 @@ struct cnxk_sso_evdev {
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
+ uint16_t vec_pool_cnt;
+ uint64_t *vec_pools;
/* Dev args */
uint32_t xae_cnt;
uint8_t qos_queue_cnt;
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 548d7b81c..c4c4f5a7f 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -40,6 +40,31 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
dev->adptr_xae_cnt += rxq->qconf.mp->size;
break;
}
+ case RTE_EVENT_TYPE_ETHDEV_VECTOR: {
+ struct rte_mempool *mp = data;
+ uint64_t *old_ptr;
+
+ for (i = 0; i < dev->vec_pool_cnt; i++) {
+ if ((uint64_t)mp == dev->vec_pools[i])
+ return;
+ }
+
+ dev->vec_pool_cnt++;
+ old_ptr = dev->vec_pools;
+ dev->vec_pools =
+ rte_realloc(dev->vec_pools,
+ sizeof(uint64_t) * dev->vec_pool_cnt, 0);
+ if (dev->vec_pools == NULL) {
+ dev->adptr_xae_cnt += mp->size;
+ dev->vec_pools = old_ptr;
+ dev->vec_pool_cnt--;
+ return;
+ }
+ dev->vec_pools[dev->vec_pool_cnt - 1] = (uint64_t)mp;
+
+ dev->adptr_xae_cnt += mp->size;
+ break;
+ }
case RTE_EVENT_TYPE_TIMER: {
struct cnxk_tim_ring *timr = data;
uint16_t *old_ring_ptr;
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 4eead0390..2528b3cda 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -238,7 +238,7 @@ struct cnxk_eth_txq_sp {
} __plt_cache_aligned;
static inline struct cnxk_eth_dev *
-cnxk_eth_pmd_priv(struct rte_eth_dev *eth_dev)
+cnxk_eth_pmd_priv(const struct rte_eth_dev *eth_dev)
{
return eth_dev->data->dev_private;
}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v5 6/7] event/cnxk: add Rx event vector fastpath
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (3 preceding siblings ...)
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
@ 2021-06-29 8:01 ` pbhagavatula
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 7/7] event/cnxk: add Tx " pbhagavatula
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 1/7] event/cnxk: add Rx adapter support pbhagavatula
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-29 8:01 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add Rx event vector fastpath to convert HW defined metadata into
rte_mbuf and rte_event_vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/rel_notes/release_21_08.rst | 1 +
drivers/event/cnxk/cn10k_worker.h | 56 +++++++
drivers/net/cnxk/cn10k_rx.h | 200 +++++++++++++++----------
drivers/net/cnxk/cn10k_rx_vec.c | 2 +-
drivers/net/cnxk/cn10k_rx_vec_mseg.c | 5 +-
5 files changed, 179 insertions(+), 85 deletions(-)
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 80ff93269..11ccc9bcb 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -64,6 +64,7 @@ New Features
* Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
is net/cnxk.
+ * Add support for event vectorization for Rx adapter.
Removed Items
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 3c90c8500..7a48a6b17 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,6 +5,8 @@
#ifndef __CN10K_WORKER_H__
#define __CN10K_WORKER_H__
+#include <rte_vect.h>
+
#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
@@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
mbuf_init | ((uint64_t)port_id) << 48, flags);
}
+static __rte_always_inline void
+cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags,
+ void *lookup_mem, void *tstamp)
+{
+ uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+ struct rte_event_vector *vec;
+ uint16_t nb_mbufs, non_vec;
+ uint64_t **wqe;
+
+ mbuf_init |= ((uint64_t)port_id) << 48;
+ vec = (struct rte_event_vector *)vwqe;
+ wqe = vec->u64s;
+
+ nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+ nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs,
+ flags | NIX_RX_VWQE_F, lookup_mem,
+ tstamp);
+ wqe += nb_mbufs;
+ non_vec = vec->nb_elem - nb_mbufs;
+
+ while (non_vec) {
+ struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+ struct rte_mbuf *mbuf;
+ uint64_t tstamp_ptr;
+
+ mbuf = (struct rte_mbuf *)((char *)cqe -
+ sizeof(struct rte_mbuf));
+ cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem,
+ mbuf_init, flags);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ wqe[0] = (uint64_t *)mbuf;
+ non_vec--;
+ wqe++;
+ }
+}
+
static __rte_always_inline uint16_t
cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
const uint32_t flags, void *lookup_mem)
@@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
flags & NIX_RX_MULTI_SEG_F,
(uint64_t *)tstamp_ptr);
gw.u64[1] = mbuf;
+ } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+ __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1];
+
+ vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) |
+ ((vwqe_hdr & 0xFFFF) << 48) |
+ ((uint64_t)port << 32);
+ *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr;
+ cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem,
+ ws->tstamp);
}
}
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index d9572b19e..a506a867c 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -21,6 +21,7 @@
* Defining it from backwards to denote its been
* not used as offload flags to pick function
*/
+#define NIX_RX_VWQE_F BIT(14)
#define NIX_RX_MULTI_SEG_F BIT(15)
#define CNXK_NIX_CQ_ENTRY_SZ 128
@@ -28,6 +29,11 @@
#define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x))
#define CQE_SZ(x) ((x) * CNXK_NIX_CQ_ENTRY_SZ)
+#define CQE_PTR_OFF(b, i, o, f) \
+ (((f) & NIX_RX_VWQE_F) ? \
+ (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \
+ (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o)))
+
union mbuf_initializer {
struct {
uint16_t data_off;
@@ -317,61 +323,87 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf)
}
static __rte_always_inline uint16_t
-cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t pkts, const uint16_t flags)
+cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts,
+ const uint16_t flags, void *lookup_mem,
+ struct cnxk_timesync_info *tstamp)
{
- struct cn10k_eth_rxq *rxq = rx_queue;
- uint16_t packets = 0;
+ struct cn10k_eth_rxq *rxq = args;
+ const uint64_t mbuf_initializer = (flags & NIX_RX_VWQE_F) ?
+ *(uint64_t *)args :
+ rxq->mbuf_initializer;
+ const uint64x2_t data_off = flags & NIX_RX_VWQE_F ?
+ vdupq_n_u64(0x80ULL) :
+ vdupq_n_u64(rxq->data_off);
+ const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask;
+ const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata;
+ const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc;
uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23;
- const uint64_t mbuf_initializer = rxq->mbuf_initializer;
- const uint64x2_t data_off = vdupq_n_u64(rxq->data_off);
uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3;
uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer);
struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3;
- const uint16_t *lookup_mem = rxq->lookup_mem;
- const uint32_t qmask = rxq->qmask;
- const uint64_t wdata = rxq->wdata;
- const uintptr_t desc = rxq->desc;
uint8x16_t f0, f1, f2, f3;
- uint32_t head = rxq->head;
+ uint16_t packets = 0;
uint16_t pkts_left;
-
- pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
- pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
-
- /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
- pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ uint32_t head;
+ uintptr_t cq0;
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ lookup_mem = rxq->lookup_mem;
+ head = rxq->head;
+
+ pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
+ pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
+ /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+ tstamp = rxq->tstamp;
+ } else {
+ RTE_SET_USED(head);
+ }
while (packets < pkts) {
- /* Exit loop if head is about to wrap and become unaligned */
- if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
- NIX_DESCS_PER_LOOP) {
- pkts_left += (pkts - packets);
- break;
- }
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Exit loop if head is about to wrap and become
+ * unaligned.
+ */
+ if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
+ NIX_DESCS_PER_LOOP) {
+ pkts_left += (pkts - packets);
+ break;
+ }
- const uintptr_t cq0 = desc + CQE_SZ(head);
+ cq0 = desc + CQE_SZ(head);
+ } else {
+ cq0 = (uintptr_t)&mbufs[packets];
+ }
/* Prefetch N desc ahead */
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(8)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(9)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(10)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(11)));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 8, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 9, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 10, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 11, 0, flags));
/* Get NIX_RX_SG_S for size and buffer pointer */
- cq0_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(0) + 64));
- cq1_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(1) + 64));
- cq2_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(2) + 64));
- cq3_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(3) + 64));
-
- /* Extract mbuf from NIX_RX_SG_S */
- mbuf01 = vzip2q_u64(cq0_w8, cq1_w8);
- mbuf23 = vzip2q_u64(cq2_w8, cq3_w8);
- mbuf01 = vqsubq_u64(mbuf01, data_off);
- mbuf23 = vqsubq_u64(mbuf23, data_off);
+ cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags));
+ cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags));
+ cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags));
+ cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags));
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Extract mbuf from NIX_RX_SG_S */
+ mbuf01 = vzip2q_u64(cq0_w8, cq1_w8);
+ mbuf23 = vzip2q_u64(cq2_w8, cq3_w8);
+ mbuf01 = vqsubq_u64(mbuf01, data_off);
+ mbuf23 = vqsubq_u64(mbuf23, data_off);
+ } else {
+ mbuf01 =
+ vsubq_u64(vld1q_u64((uint64_t *)cq0), data_off);
+ mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)),
+ data_off);
+ }
/* Move mbufs to scalar registers for future use */
mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0);
@@ -395,14 +427,14 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
f3 = vqtbl1q_u8(cq3_w8, shuf_msk);
/* Load CQE word0 and word 1 */
- uint64_t cq0_w0 = ((uint64_t *)(cq0 + CQE_SZ(0)))[0];
- uint64_t cq0_w1 = ((uint64_t *)(cq0 + CQE_SZ(0)))[1];
- uint64_t cq1_w0 = ((uint64_t *)(cq0 + CQE_SZ(1)))[0];
- uint64_t cq1_w1 = ((uint64_t *)(cq0 + CQE_SZ(1)))[1];
- uint64_t cq2_w0 = ((uint64_t *)(cq0 + CQE_SZ(2)))[0];
- uint64_t cq2_w1 = ((uint64_t *)(cq0 + CQE_SZ(2)))[1];
- uint64_t cq3_w0 = ((uint64_t *)(cq0 + CQE_SZ(3)))[0];
- uint64_t cq3_w1 = ((uint64_t *)(cq0 + CQE_SZ(3)))[1];
+ const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags);
+ const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 1, flags);
+ const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags);
+ const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 1, flags);
+ const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags);
+ const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 1, flags);
+ const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags);
+ const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 1, flags);
if (flags & NIX_RX_OFFLOAD_RSS_F) {
/* Fill rss in the rx_descriptor_fields1 */
@@ -459,17 +491,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) {
ol_flags0 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(0) + 38), ol_flags0,
- mbuf0);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags),
+ ol_flags0, mbuf0);
ol_flags1 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(1) + 38), ol_flags1,
- mbuf1);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags),
+ ol_flags1, mbuf1);
ol_flags2 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(2) + 38), ol_flags2,
- mbuf2);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags),
+ ol_flags2, mbuf2);
ol_flags3 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(3) + 38), ol_flags3,
- mbuf3);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags),
+ ol_flags3, mbuf3);
}
if (flags & NIX_RX_OFFLOAD_TSTAMP_F) {
@@ -488,7 +520,7 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
RTE_PTYPE_L2_ETHER_TIMESYNC};
const uint64_t ts_olf = PKT_RX_IEEE1588_PTP |
PKT_RX_IEEE1588_TMST |
- rxq->tstamp->rx_tstamp_dynflag;
+ tstamp->rx_tstamp_dynflag;
const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8};
uint64x2_t ts01, ts23, mask;
uint64_t ts[4];
@@ -526,14 +558,10 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
ts[3] = vgetq_lane_u64(ts23, 1);
/* Store timestamp into dynfield. */
- *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) =
- ts[0];
- *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) =
- ts[1];
- *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) =
- ts[2];
- *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) =
- ts[3];
+ *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0];
+ *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1];
+ *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2];
+ *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3];
/* Generate ptype mask to filter L2 ether timesync */
mask = vdupq_n_u32(vgetq_lane_u32(f0, 0));
@@ -559,9 +587,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
/* Update Rxq timestamp with the latest
* timestamp.
*/
- rxq->tstamp->rx_ready = 1;
- rxq->tstamp->rx_tstamp =
- ts[31 - __builtin_clz(res)];
+ tstamp->rx_ready = 1;
+ tstamp->rx_tstamp = ts[31 - __builtin_clz(res)];
}
}
@@ -584,25 +611,25 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
/* Store the mbufs to rx_pkts */
- vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
- vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
+ vst1q_u64((uint64_t *)&mbufs[packets], mbuf01);
+ vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23);
if (flags & NIX_RX_MULTI_SEG_F) {
/* Multi segment is enable build mseg list for
* individual mbufs in scalar mode.
*/
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(0) + 8), mbuf0,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 0, 8, flags)),
+ mbuf0, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(1) + 8), mbuf1,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 1, 8, flags)),
+ mbuf1, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(2) + 8), mbuf2,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 2, 8, flags)),
+ mbuf2, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(3) + 8), mbuf3,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 3, 8, flags)),
+ mbuf3, mbuf_initializer, flags);
} else {
/* Update that no more segments */
mbuf0->next = NULL;
@@ -623,12 +650,18 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
__mempool_check_cookies(mbuf2->pool, (void **)&mbuf2, 1, 1);
__mempool_check_cookies(mbuf3->pool, (void **)&mbuf3, 1, 1);
- /* Advance head pointer and packets */
- head += NIX_DESCS_PER_LOOP;
- head &= qmask;
packets += NIX_DESCS_PER_LOOP;
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Advance head pointer and packets */
+ head += NIX_DESCS_PER_LOOP;
+ head &= qmask;
+ }
}
+ if (flags & NIX_RX_VWQE_F)
+ return packets;
+
rxq->head = head;
rxq->available -= packets;
@@ -637,8 +670,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
plt_write64((rxq->wdata | packets), rxq->cq_door);
if (unlikely(pkts_left))
- packets += cn10k_nix_recv_pkts(rx_queue, &rx_pkts[packets],
- pkts_left, flags);
+ packets += cn10k_nix_recv_pkts(args, &mbufs[packets], pkts_left,
+ flags);
return packets;
}
@@ -647,12 +680,15 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
static inline uint16_t
cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t pkts, const uint16_t flags)
+ uint16_t pkts, const uint16_t flags,
+ void *lookup_mem, void *tstamp)
{
+ RTE_SET_USED(lookup_mem);
RTE_SET_USED(rx_queue);
RTE_SET_USED(rx_pkts);
RTE_SET_USED(pkts);
RTE_SET_USED(flags);
+ RTE_SET_USED(tstamp);
return 0;
}
diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c
index 93528a44f..166735ad5 100644
--- a/drivers/net/cnxk/cn10k_rx_vec.c
+++ b/drivers/net/cnxk/cn10k_rx_vec.c
@@ -12,7 +12,7 @@
uint16_t pkts) \
{ \
return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
- (flags)); \
+ (flags), NULL, NULL); \
}
NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
index 04d1e46c8..1f44ddddd 100644
--- a/drivers/net/cnxk/cn10k_rx_vec_mseg.c
+++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
@@ -9,8 +9,9 @@
uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
{ \
- return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
- (flags) | NIX_RX_MULTI_SEG_F); \
+ return cn10k_nix_recv_pkts_vector( \
+ rx_queue, rx_pkts, pkts, (flags) | NIX_RX_MULTI_SEG_F, \
+ NULL, NULL); \
}
NIX_RX_FASTPATH_MODES
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v5 7/7] event/cnxk: add Tx event vector fastpath
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (4 preceding siblings ...)
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
@ 2021-06-29 8:01 ` pbhagavatula
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 1/7] event/cnxk: add Rx adapter support pbhagavatula
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-06-29 8:01 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add Tx event vector fastpath, integrate event vector Tx routine
into Tx burst.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 1 +
doc/guides/rel_notes/release_21_08.rst | 2 +-
drivers/common/cnxk/roc_sso.h | 23 ++++++
drivers/event/cnxk/cn10k_eventdev.c | 3 +-
drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++--
drivers/event/cnxk/cn9k_worker.h | 4 +-
drivers/event/cnxk/cnxk_worker.h | 22 ------
drivers/net/cnxk/cn10k_tx.c | 2 +-
drivers/net/cnxk/cn10k_tx.h | 52 +++++++++----
drivers/net/cnxk/cn10k_tx_mseg.c | 3 +-
drivers/net/cnxk/cn10k_tx_vec.c | 2 +-
drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +-
12 files changed, 167 insertions(+), 53 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 0297cd3d5..53560d383 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -47,6 +47,7 @@ Features of the OCTEON cnxk SSO PMD are:
- Full Rx/Tx offload support defined through ethdev queue configuration.
- HW managed event vectorization on CN10K for packets enqueued from ethdev to
eventdev configurable per each Rx queue in Rx adapter.
+- Event vector transmission via Tx adapter.
Prerequisites and Compilation procedure
---------------------------------------
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 11ccc9bcb..9e49cb27d 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -64,7 +64,7 @@ New Features
* Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
is net/cnxk.
- * Add support for event vectorization for Rx adapter.
+ * Add support for event vectorization for Rx/Tx adapter.
Removed Items
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index a6030e7d8..316c6ccd5 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -44,6 +44,29 @@ struct roc_sso {
uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
} __plt_cache_aligned;
+static __rte_always_inline void
+roc_sso_hws_head_wait(uintptr_t tag_op)
+{
+#ifdef RTE_ARCH_ARM64
+ uint64_t tag;
+
+ asm volatile(PLT_CPU_FEATURE_PREAMBLE
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbnz %[tag], 35, done%= \n"
+ " sevl \n"
+ "rty%=: wfe \n"
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbz %[tag], 35, rty%= \n"
+ "done%=: \n"
+ : [tag] "=&r"(tag)
+ : [tag_op] "r"(tag_op));
+#else
+ /* Wait for the SWTAG/SWTAG_FULL operation */
+ while (!(plt_read64(tag_op) & BIT_ULL(35)))
+ ;
+#endif
+}
+
/* SSO device initialization */
int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso);
int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso);
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index e85fa4785..6f37c5bd2 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
if (ret)
*caps = 0;
else
- *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
return 0;
}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 7a48a6b17..9cc099206 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
NIX_RX_FASTPATH_MODES
#undef R
-static __rte_always_inline const struct cn10k_eth_txq *
+static __rte_always_inline struct cn10k_eth_txq *
cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
{
- return (const struct cn10k_eth_txq *)
+ return (struct cn10k_eth_txq *)
txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
}
+static __rte_always_inline void
+cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+ uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr,
+ uint8_t sched_type, uintptr_t base,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ uint16_t port[4], queue[4];
+ struct cn10k_eth_txq *txq;
+ uint16_t i, j;
+ uintptr_t pa;
+
+ for (i = 0; i < nb_mbufs; i += 4) {
+ port[0] = mbufs[i]->port;
+ port[1] = mbufs[i + 1]->port;
+ port[2] = mbufs[i + 2]->port;
+ port[3] = mbufs[i + 3]->port;
+
+ queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
+ queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]);
+ queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]);
+ queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]);
+
+ if (((port[0] ^ port[1]) & (port[2] ^ port[3])) ||
+ ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) {
+
+ for (j = 0; j < 4; j++) {
+ struct rte_mbuf *m = mbufs[i + j];
+
+ txq = (struct cn10k_eth_txq *)
+ txq_data[port[j]][queue[j]];
+ cn10k_nix_tx_skeleton(txq, cmd, flags);
+ /* Perform header writes before barrier
+ * for TSO
+ */
+ if (flags & NIX_TX_OFFLOAD_TSO_F)
+ cn10k_nix_xmit_prepare_tso(m, flags);
+
+ cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags,
+ txq->lso_tun_fmt);
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw =
+ cn10k_nix_prepare_mseg(
+ m, (uint64_t *)lmt_addr,
+ flags);
+ pa = txq->io_addr | ((segdw - 1) << 4);
+ } else {
+ pa = txq->io_addr |
+ (cn10k_nix_tx_ext_subs(flags) + 1)
+ << 4;
+ }
+ if (!sched_type)
+ roc_sso_hws_head_wait(base +
+ SSOW_LF_GWS_TAG);
+
+ roc_lmt_submit_steorl(lmt_id, pa);
+ }
+ } else {
+ txq = (struct cn10k_eth_txq *)
+ txq_data[port[0]][queue[0]];
+ cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base
+ + SSOW_LF_GWS_TAG,
+ flags | NIX_TX_VWQE_F);
+ }
+ }
+}
+
static __rte_always_inline uint16_t
cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
uint64_t *cmd,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
const uint32_t flags)
{
- const struct cn10k_eth_txq *txq;
- struct rte_mbuf *m = ev->mbuf;
- uint16_t ref_cnt = m->refcnt;
+ struct cn10k_eth_txq *txq;
+ struct rte_mbuf *m;
uintptr_t lmt_addr;
+ uint16_t ref_cnt;
uint16_t lmt_id;
uintptr_t pa;
lmt_addr = ws->lmt_base;
ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+ if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
+ struct rte_mbuf **mbufs = ev->vec->mbufs;
+ uint64_t meta = *(uint64_t *)ev->vec;
+
+ if (meta & BIT(31)) {
+ txq = (struct cn10k_eth_txq *)
+ txq_data[meta >> 32][meta >> 48];
+
+ cn10k_nix_xmit_pkts_vector(
+ txq, mbufs, meta & 0xFFFF, cmd,
+ ws->tx_base + SSOW_LF_GWS_TAG,
+ flags | NIX_TX_VWQE_F);
+ } else {
+ cn10k_sso_vwqe_split_tx(
+ mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr,
+ ev->sched_type, ws->tx_base, txq_data, flags);
+ }
+ rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+ return (meta & 0xFFFF);
+ }
+
+ m = ev->mbuf;
+ ref_cnt = m->refcnt;
txq = cn10k_sso_hws_xtract_meta(m, txq_data);
cn10k_nix_tx_skeleton(txq, cmd, flags);
/* Perform header writes before barrier for TSO */
@@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
}
if (!ev->sched_type)
- cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
roc_lmt_submit_steorl(lmt_id, pa);
@@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
-
return 1;
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 5aa053c58..ef1e83741 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -458,7 +458,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
- cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
txq->io_addr, segdw);
@@ -469,7 +469,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
} else {
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
- cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_one(cmd, txq->lmt_addr,
txq->io_addr, flags);
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 4eb46ae16..945132b74 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -75,27 +75,5 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
#endif
}
-static __rte_always_inline void
-cnxk_sso_hws_head_wait(uintptr_t tag_op)
-{
-#ifdef RTE_ARCH_ARM64
- uint64_t swtp;
-
- asm volatile(PLT_CPU_FEATURE_PREAMBLE
- " ldr %[swtb], [%[swtp_loc]] \n"
- " tbz %[swtb], 35, done%= \n"
- " sevl \n"
- "rty%=: wfe \n"
- " ldr %[swtb], [%[swtp_loc]] \n"
- " tbnz %[swtb], 35, rty%= \n"
- "done%=: \n"
- : [swtb] "=&r"(swtp)
- : [swtp_loc] "r"(tag_op));
-#else
- /* Wait for the SWTAG/SWTAG_FULL operation */
- while (plt_read64(tag_op) & BIT_ULL(35))
- ;
-#endif
-}
#endif
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 1f30bab59..0e1276c60 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -16,7 +16,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \
- flags); \
+ 0, flags); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index eb148b8e7..f75cae07a 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -18,6 +18,7 @@
* Defining it from backwards to denote its been
* not used as offload flags to pick function
*/
+#define NIX_TX_VWQE_F BIT(14)
#define NIX_TX_MULTI_SEG_F BIT(15)
#define NIX_TX_NEED_SEND_HDR_W1 \
@@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
- uint64_t *cmd, const uint16_t flags)
+ uint64_t *cmd, uintptr_t base, const uint16_t flags)
{
struct cn10k_eth_txq *txq = tx_queue;
const rte_iova_t io_addr = txq->io_addr;
@@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
uint64_t lso_tun_fmt;
uint64_t data;
- NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ if (!(flags & NIX_TX_VWQE_F)) {
+ NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ /* Reduce the cached count */
+ txq->fc_cache_pkts -= pkts;
+ }
/* Get cmd skeleton */
cn10k_nix_tx_skeleton(txq, cmd, flags);
- /* Reduce the cached count */
- txq->fc_cache_pkts -= pkts;
-
if (flags & NIX_TX_OFFLOAD_TSO_F)
lso_tun_fmt = txq->lso_tun_fmt;
@@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
}
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
/* Trigger LMTST */
if (burst > 16) {
data = cn10k_nix_tx_steor_data(flags);
@@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
struct cn10k_eth_txq *txq = tx_queue;
uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
@@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
shft += 3;
}
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
data0 = (uint64_t)data128;
data1 = (uint64_t)(data128 >> 64);
/* Make data0 similar to data1 */
@@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
@@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64_t data[2];
} wd;
- NIX_XMIT_FC_OR_RETURN(txq, pkts);
-
- scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
- pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ if (!(flags & NIX_TX_VWQE_F)) {
+ NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ /* Reduce the cached count */
+ txq->fc_cache_pkts -= pkts;
+ } else {
+ scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ }
- /* Reduce the cached count */
- txq->fc_cache_pkts -= pkts;
/* Perform header writes before barrier for TSO */
if (flags & NIX_TX_OFFLOAD_TSO_F) {
for (i = 0; i < pkts; i++)
@@ -1973,6 +1987,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (flags & NIX_TX_MULTI_SEG_F)
wd.data[0] >>= 16;
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
/* Trigger LMTST */
if (lnum > 16) {
if (!(flags & NIX_TX_MULTI_SEG_F))
@@ -2029,10 +2046,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (unlikely(scalar)) {
if (flags & NIX_TX_MULTI_SEG_F)
pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
- scalar, cmd, flags);
+ scalar, cmd, base,
+ flags);
else
pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
- cmd, flags);
+ cmd, base, flags);
}
return pkts;
@@ -2041,13 +2059,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
#else
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
RTE_SET_USED(tx_queue);
RTE_SET_USED(tx_pkts);
RTE_SET_USED(pkts);
RTE_SET_USED(cmd);
RTE_SET_USED(flags);
+ RTE_SET_USED(base);
return 0;
}
#endif
diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c
index 33f675472..4ea4c8a4e 100644
--- a/drivers/net/cnxk/cn10k_tx_mseg.c
+++ b/drivers/net/cnxk/cn10k_tx_mseg.c
@@ -18,7 +18,8 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \
- (flags) | NIX_TX_MULTI_SEG_F); \
+ 0, (flags) \
+ | NIX_TX_MULTI_SEG_F); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index 34e373750..a0350496a 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -18,7 +18,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
- (flags)); \
+ 0, (flags)); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
index 1fad81dba..7f98f79b9 100644
--- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c
+++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
@@ -16,7 +16,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector( \
- tx_queue, tx_pkts, pkts, cmd, \
+ tx_queue, tx_pkts, pkts, cmd, 0, \
(flags) | NIX_TX_MULTI_SEG_F); \
}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v6 1/7] event/cnxk: add Rx adapter support
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (5 preceding siblings ...)
2021-06-29 8:01 ` [dpdk-dev] [PATCH v5 7/7] event/cnxk: add Tx " pbhagavatula
@ 2021-07-02 21:14 ` pbhagavatula
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
` (6 more replies)
6 siblings, 7 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-02 21:14 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Ray Kinsella,
Neil Horman
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Rx adapter.
Resize cn10k workslot fastpath structure to fit in 64B cacheline size.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v6 Changes:
- More code cleanup.
- Fix incorrect SQB configuration and missing fc check.
v5 Changes:
- Use cnxk_eth_rxq_to_sp instead of manually calculating sp offset.
v4 Changes:
- Split patches for easier merge.
v3 Changes:
- Spell check.
doc/guides/eventdevs/cnxk.rst | 28 ++++
doc/guides/rel_notes/release_21_08.rst | 5 +
drivers/common/cnxk/roc_nix.h | 3 +
drivers/common/cnxk/roc_nix_fc.c | 78 +++++++++++
drivers/common/cnxk/roc_nix_priv.h | 3 +-
drivers/common/cnxk/version.map | 1 +
drivers/event/cnxk/cn10k_eventdev.c | 107 ++++++++++++---
drivers/event/cnxk/cn10k_worker.c | 7 +-
drivers/event/cnxk/cn10k_worker.h | 32 +++--
drivers/event/cnxk/cn9k_eventdev.c | 89 +++++++++++++
drivers/event/cnxk/cn9k_worker.h | 4 +
drivers/event/cnxk/cnxk_eventdev.c | 2 +
drivers/event/cnxk/cnxk_eventdev.h | 43 ++++--
drivers/event/cnxk/cnxk_eventdev_adptr.c | 158 +++++++++++++++++++++++
drivers/event/cnxk/meson.build | 9 +-
15 files changed, 522 insertions(+), 47 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 36da3800c..b7e82c127 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -39,6 +39,10 @@ Features of the OCTEON cnxk SSO PMD are:
time granularity of 2.5us on CN9K and 1us on CN10K.
- Up to 256 TIM rings a.k.a event timer adapters.
- Up to 8 rings traversed in parallel.
+- HW managed packets enqueued from ethdev to eventdev exposed through event eth
+ RX adapter.
+- N:1 ethernet device Rx queue to Event queue mapping.
+- Full Rx offload support defined through ethdev queue configuration.
Prerequisites and Compilation procedure
---------------------------------------
@@ -93,6 +97,15 @@ Runtime Config Options
-a 0002:0e:00.0,qos=[1-50-50-50]
+- ``Force Rx Back pressure``
+
+ Force Rx back pressure when same mempool is used across ethernet device
+ connected to event device.
+
+ For example::
+
+ -a 0002:0e:00.0,force_rx_bp=1
+
- ``TIM disable NPA``
By default chunks are allocated from NPA then TIM can automatically free
@@ -160,3 +173,18 @@ Debugging Options
+---+------------+-------------------------------------------------------+
| 2 | TIM | --log-level='pmd\.event\.cnxk\.timer,8' |
+---+------------+-------------------------------------------------------+
+
+Limitations
+-----------
+
+Rx adapter support
+~~~~~~~~~~~~~~~~~~
+
+Using the same mempool for all the ethernet device ports connected to
+event device would cause back pressure to be asserted only on the first
+ethernet device.
+Back pressure is automatically disabled when using same mempool for all the
+ethernet devices connected to event device to override this applications can
+use `force_rx_bp=1` device arguments.
+Using unique mempool per each ethernet device is recommended when they are
+connected to event device.
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 31e49e1a5..3892c8017 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -60,6 +60,11 @@ New Features
* Added net/cnxk driver which provides the support for the integrated ethernet
device.
+* **Added support for Marvell CN10K, CN9K, event Rx adapter.**
+
+ * Added Rx adapter support for event/cnxk when the ethernet device requested is
+ net/cnxk.
+
Removed Items
-------------
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index bb6902795..76613fe84 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix,
enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix);
+void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id,
+ uint8_t ena, uint8_t force);
+
/* NPC */
int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable);
diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c
index 47be8aa3f..f17eba416 100644
--- a/drivers/common/cnxk/roc_nix_fc.c
+++ b/drivers/common/cnxk/roc_nix_fc.c
@@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode mode)
exit:
return rc;
}
+
+void
+rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena,
+ uint8_t force)
+{
+ struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+ struct npa_lf *lf = idev_npa_obj_get();
+ struct npa_aq_enq_req *req;
+ struct npa_aq_enq_rsp *rsp;
+ struct mbox *mbox;
+ uint32_t limit;
+ int rc;
+
+ if (roc_nix_is_sdp(roc_nix))
+ return;
+
+ if (!lf)
+ return;
+ mbox = lf->mbox;
+
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_READ;
+
+ rc = mbox_process_msg(mbox, (void *)&rsp);
+ if (rc)
+ return;
+
+ limit = rsp->aura.limit;
+ /* BP is already enabled. */
+ if (rsp->aura.bp_ena) {
+ /* If BP ids don't match disable BP. */
+ if ((rsp->aura.nix0_bpid != nix->bpid[0]) && !force) {
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_WRITE;
+
+ req->aura.bp_ena = 0;
+ req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena);
+
+ mbox_process(mbox);
+ }
+ return;
+ }
+
+ /* BP was previously enabled but now disabled skip. */
+ if (rsp->aura.bp)
+ return;
+
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_WRITE;
+
+ if (ena) {
+ req->aura.nix0_bpid = nix->bpid[0];
+ req->aura_mask.nix0_bpid = ~(req->aura_mask.nix0_bpid);
+ req->aura.bp = NIX_RQ_AURA_THRESH(
+ limit > 128 ? 256 : limit); /* 95% of size*/
+ req->aura_mask.bp = ~(req->aura_mask.bp);
+ }
+
+ req->aura.bp_ena = !!ena;
+ req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena);
+
+ mbox_process(mbox);
+}
diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h
index d9c32df44..9dc0c88a6 100644
--- a/drivers/common/cnxk/roc_nix_priv.h
+++ b/drivers/common/cnxk/roc_nix_priv.h
@@ -16,7 +16,8 @@
#define NIX_SQB_LOWER_THRESH ((uint16_t)70)
/* Apply BP/DROP when CQ is 95% full */
-#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100)
+#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100)
+#define NIX_RQ_AURA_THRESH(x) (((x) * 95) / 100)
/* IRQ triggered when NIX_LF_CINTX_CNT[QCOUNT] crosses this value */
#define CQ_CQE_THRESH_DEFAULT 0x1ULL
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 8a5c839e5..cb1ce4b6f 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -29,6 +29,7 @@ INTERNAL {
roc_nix_fc_config_set;
roc_nix_fc_mode_set;
roc_nix_fc_mode_get;
+ rox_nix_fc_npa_bp_cfg;
roc_nix_get_base_chan;
roc_nix_get_pf;
roc_nix_get_pf_func;
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index bf4052c76..2060c8fe8 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -6,18 +6,6 @@
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
-static void
-cn10k_init_hws_ops(struct cn10k_sso_hws *ws, uintptr_t base)
-{
- ws->tag_wqe_op = base + SSOW_LF_GWS_WQE0;
- ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0;
- ws->updt_wqe_op = base + SSOW_LF_GWS_OP_UPD_WQP_GRP1;
- ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM;
- ws->swtag_untag_op = base + SSOW_LF_GWS_OP_SWTAG_UNTAG;
- ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH;
- ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED;
-}
-
static uint32_t
cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev)
{
@@ -56,7 +44,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
- cn10k_init_hws_ops(ws, ws->base);
ws->hws_id = port_id;
ws->swtag_req = 0;
ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -135,13 +122,14 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
cq_ds_cnt &= 0x3FFF3FFF0000;
while (aq_cnt || cq_ds_cnt || ds_cnt) {
- plt_write64(req, ws->getwrk_op);
+ plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
cn10k_sso_hws_get_work_empty(ws, &ev);
if (fn != NULL && ev.u64 != 0)
fn(arg, ev);
if (ev.sched_type != SSO_TT_EMPTY)
- cnxk_sso_hws_swtag_flush(ws->tag_wqe_op,
- ws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(
+ ws->base + SSOW_LF_GWS_WQE0,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
do {
val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
} while (val & BIT_ULL(56));
@@ -205,9 +193,11 @@ cn10k_sso_hws_reset(void *arg, void *hws)
if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) !=
SSO_TT_EMPTY) {
- plt_write64(BIT_ULL(16) | 1, ws->getwrk_op);
+ plt_write64(BIT_ULL(16) | 1,
+ ws->base + SSOW_LF_GWS_OP_GET_WORK0);
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
@@ -407,6 +397,80 @@ cn10k_sso_selftest(void)
return cnxk_sso_selftest(RTE_STR(event_cn10k));
}
+static int
+cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int rc;
+
+ RTE_SET_USED(event_dev);
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9);
+ if (rc)
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+ else
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+ return 0;
+}
+
+static void
+cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem,
+ void *tstmp_info)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ struct cn10k_sso_hws *ws = event_dev->data->ports[i];
+ ws->lookup_mem = lookup_mem;
+ ws->tstamp = tstmp_info;
+ }
+}
+
+static int
+cn10k_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cn10k_eth_rxq *rxq;
+ void *lookup_mem;
+ void *tstmp_info;
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id,
+ queue_conf);
+ if (rc)
+ return -EINVAL;
+ rxq = eth_dev->data->rx_queues[0];
+ lookup_mem = rxq->lookup_mem;
+ tstmp_info = rxq->tstamp;
+ cn10k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info);
+ cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.dev_infos_get = cn10k_sso_info_get,
.dev_configure = cn10k_sso_dev_configure,
@@ -420,6 +484,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.port_unlink = cn10k_sso_port_unlink,
.timeout_ticks = cnxk_sso_timeout_ticks,
+ .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get,
+ .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add,
+ .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del,
+ .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+ .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
@@ -502,6 +572,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn10k, cn10k_pci_sso_map);
RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
CNXK_SSO_GGRP_QOS "=<string>"
+ CNXK_SSO_FORCE_BP "=1"
CN10K_SSO_GW_MODE "=<int>"
CNXK_TIM_DISABLE_NPA "=1"
CNXK_TIM_CHNK_SLOTS "=<int>"
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index e2aa534c6..5dbae275b 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -18,7 +18,8 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev)
cn10k_sso_hws_forward_event(ws, ev);
break;
case RTE_EVENT_OP_RELEASE:
- cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, ws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
break;
default:
return 0;
@@ -69,7 +70,7 @@ cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
if (ws->swtag_req) {
ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
return 1;
}
@@ -94,7 +95,7 @@ cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
if (ws->swtag_req) {
ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
return ret;
}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 2f093a8dd..c7250bf9e 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,9 +5,13 @@
#ifndef __CN10K_WORKER_H__
#define __CN10K_WORKER_H__
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
+#include "cn10k_ethdev.h"
+#include "cn10k_rx.h"
+
/* SSO Operations */
static __rte_always_inline uint8_t
@@ -31,7 +35,8 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
{
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(ws->tag_wqe_op));
+ const uint8_t cur_tt =
+ CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0));
/* CNXK model
* cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
@@ -43,9 +48,11 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
if (new_tt == SSO_TT_UNTAGGED) {
if (cur_tt != SSO_TT_UNTAGGED)
- cnxk_sso_hws_swtag_untag(ws->swtag_untag_op);
+ cnxk_sso_hws_swtag_untag(ws->base +
+ SSOW_LF_GWS_OP_SWTAG_UNTAG);
} else {
- cnxk_sso_hws_swtag_norm(tag, new_tt, ws->swtag_norm_op);
+ cnxk_sso_hws_swtag_norm(tag, new_tt,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_NORM);
}
ws->swtag_req = 1;
}
@@ -57,8 +64,9 @@ cn10k_sso_hws_fwd_group(struct cn10k_sso_hws *ws, const struct rte_event *ev,
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- plt_write64(ev->u64, ws->updt_wqe_op);
- cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op);
+ plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+ cnxk_sso_hws_swtag_desched(tag, new_tt, grp,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
}
static __rte_always_inline void
@@ -68,7 +76,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
const uint8_t grp = ev->queue_id;
/* Group hasn't changed, Use SWTAG to forward the event */
- if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_wqe_op)) == grp)
+ if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp)
cn10k_sso_hws_fwd_swtag(ws, ev);
else
/*
@@ -93,12 +101,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
PLT_CPU_FEATURE_PREAMBLE
"caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
: [wdata] "+r"(gw.get_work)
- : [gw_loc] "r"(ws->getwrk_op)
+ : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
: "memory");
#else
- plt_write64(gw.u64[0], ws->getwrk_op);
+ plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
@@ -130,11 +139,12 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
: [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
- : [tag_loc] "r"(ws->tag_wqe_op)
+ : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
: "memory");
#else
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
#endif
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 0684417ea..072800c24 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -481,6 +481,88 @@ cn9k_sso_selftest(void)
return cnxk_sso_selftest(RTE_STR(event_cn9k));
}
+static int
+cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int rc;
+
+ RTE_SET_USED(event_dev);
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9);
+ if (rc)
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+ else
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+ return 0;
+}
+
+static void
+cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem,
+ void *tstmp_info)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ if (dev->dual_ws) {
+ struct cn9k_sso_hws_dual *dws =
+ event_dev->data->ports[i];
+ dws->lookup_mem = lookup_mem;
+ dws->tstamp = tstmp_info;
+ } else {
+ struct cn9k_sso_hws *ws = event_dev->data->ports[i];
+ ws->lookup_mem = lookup_mem;
+ ws->tstamp = tstmp_info;
+ }
+ }
+}
+
+static int
+cn9k_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cn9k_eth_rxq *rxq;
+ void *lookup_mem;
+ void *tstmp_info;
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (rc)
+ return -EINVAL;
+
+ rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id,
+ queue_conf);
+ if (rc)
+ return -EINVAL;
+
+ rxq = eth_dev->data->rx_queues[0];
+ lookup_mem = rxq->lookup_mem;
+ tstmp_info = rxq->tstamp;
+ cn9k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info);
+ cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (rc)
+ return -EINVAL;
+
+ return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.dev_infos_get = cn9k_sso_info_get,
.dev_configure = cn9k_sso_dev_configure,
@@ -494,6 +576,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.port_unlink = cn9k_sso_port_unlink,
.timeout_ticks = cnxk_sso_timeout_ticks,
+ .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get,
+ .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add,
+ .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del,
+ .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+ .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
@@ -571,6 +659,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn9k, cn9k_pci_sso_map);
RTE_PMD_REGISTER_KMOD_DEP(event_cn9k, "vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(event_cn9k, CNXK_SSO_XAE_CNT "=<int>"
CNXK_SSO_GGRP_QOS "=<string>"
+ CNXK_SSO_FORCE_BP "=1"
CN9K_SSO_SINGLE_WS "=1"
CNXK_TIM_DISABLE_NPA "=1"
CNXK_TIM_CHNK_SLOTS "=<int>"
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 38fca08fb..f5a440146 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -5,9 +5,13 @@
#ifndef __CN9K_WORKER_H__
#define __CN9K_WORKER_H__
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
+#include "cn9k_ethdev.h"
+#include "cn9k_rx.h"
+
/* SSO Operations */
static __rte_always_inline uint8_t
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 7189ee3a7..cfd7fb971 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -571,6 +571,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
&dev->xae_cnt);
rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict,
dev);
+ rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_value,
+ &dev->force_ena_bp);
rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_value,
&single_ws);
rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 668e51d62..b65d725f5 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -6,6 +6,8 @@
#define __CNXK_EVENTDEV_H__
#include <rte_devargs.h>
+#include <rte_ethdev.h>
+#include <rte_event_eth_rx_adapter.h>
#include <rte_kvargs.h>
#include <rte_mbuf_pool_ops.h>
#include <rte_pci.h>
@@ -18,6 +20,7 @@
#define CNXK_SSO_XAE_CNT "xae_cnt"
#define CNXK_SSO_GGRP_QOS "qos"
+#define CNXK_SSO_FORCE_BP "force_rx_bp"
#define CN9K_SSO_SINGLE_WS "single_ws"
#define CN10K_SSO_GW_MODE "gw_mode"
@@ -81,7 +84,10 @@ struct cnxk_sso_evdev {
uint64_t nb_xaq_cfg;
rte_iova_t fc_iova;
struct rte_mempool *xaq_pool;
+ uint64_t rx_offloads;
uint64_t adptr_xae_cnt;
+ uint16_t rx_adptr_pool_cnt;
+ uint64_t *rx_adptr_pools;
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
@@ -89,25 +95,18 @@ struct cnxk_sso_evdev {
uint32_t xae_cnt;
uint8_t qos_queue_cnt;
struct cnxk_sso_qos *qos_parse_data;
+ uint8_t force_ena_bp;
/* CN9K */
uint8_t dual_ws;
/* CN10K */
uint8_t gw_mode;
} __rte_cache_aligned;
-/* CN10K HWS ops */
-#define CN10K_SSO_HWS_OPS \
- uintptr_t swtag_desched_op; \
- uintptr_t swtag_flush_op; \
- uintptr_t swtag_untag_op; \
- uintptr_t swtag_norm_op; \
- uintptr_t updt_wqe_op; \
- uintptr_t tag_wqe_op; \
- uintptr_t getwrk_op
-
struct cn10k_sso_hws {
- /* Get Work Fastpath data */
- CN10K_SSO_HWS_OPS;
+ uint64_t base;
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint32_t gw_wdata;
uint8_t swtag_req;
uint8_t hws_id;
@@ -115,7 +114,6 @@ struct cn10k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base;
uintptr_t lmt_base;
} __rte_cache_aligned;
@@ -132,6 +130,9 @@ struct cn10k_sso_hws {
struct cn9k_sso_hws {
/* Get Work Fastpath data */
CN9K_SSO_HWS_OPS;
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint8_t swtag_req;
uint8_t hws_id;
/* Add Work Fastpath data */
@@ -148,6 +149,9 @@ struct cn9k_sso_hws_state {
struct cn9k_sso_hws_dual {
/* Get Work Fastpath data */
struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint8_t swtag_req;
uint8_t vws; /* Ping pong bit */
uint8_t hws_id;
@@ -250,4 +254,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev,
/* CN9K */
void cn9k_sso_set_rsrc(void *arg);
+/* Common adapter ops */
+int cnxk_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf);
+int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id);
+int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev);
+int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev);
+
#endif /* __CNXK_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 89a1d82c1..3b7ecb375 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -2,6 +2,7 @@
* Copyright(C) 2021 Marvell.
*/
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
void
@@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
int i;
switch (event_type) {
+ case RTE_EVENT_TYPE_ETHDEV: {
+ struct cnxk_eth_rxq_sp *rxq = data;
+ uint64_t *old_ptr;
+
+ for (i = 0; i < dev->rx_adptr_pool_cnt; i++) {
+ if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i])
+ return;
+ }
+
+ dev->rx_adptr_pool_cnt++;
+ old_ptr = dev->rx_adptr_pools;
+ dev->rx_adptr_pools = rte_realloc(
+ dev->rx_adptr_pools,
+ sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0);
+ if (dev->rx_adptr_pools == NULL) {
+ dev->adptr_xae_cnt += rxq->qconf.mp->size;
+ dev->rx_adptr_pools = old_ptr;
+ dev->rx_adptr_pool_cnt--;
+ return;
+ }
+ dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] =
+ (uint64_t)rxq->qconf.mp;
+
+ dev->adptr_xae_cnt += rxq->qconf.mp->size;
+ break;
+ }
case RTE_EVENT_TYPE_TIMER: {
struct cnxk_tim_ring *timr = data;
uint16_t *old_ring_ptr;
@@ -65,3 +92,134 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
break;
}
}
+
+static int
+cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id,
+ uint16_t port_id, const struct rte_event *ev,
+ uint8_t custom_flowid)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+ rq->sso_ena = 1;
+ rq->tt = ev->sched_type;
+ rq->hwgrp = ev->queue_id;
+ rq->flow_tag_width = 20;
+ rq->wqe_skip = 1;
+ rq->tag_mask = (port_id & 0xF) << 20;
+ rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4))
+ << 24;
+
+ if (custom_flowid) {
+ rq->flow_tag_width = 0;
+ rq->tag_mask |= ev->flow_id;
+ }
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+static int
+cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+ rq->sso_ena = 0;
+ rq->flow_tag_width = 32;
+ rq->tag_mask = 0;
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+int
+cnxk_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ uint16_t port = eth_dev->data->port_id;
+ struct cnxk_eth_rxq_sp *rxq_sp;
+ int i, rc = 0;
+
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+ rc |= cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev,
+ i, queue_conf);
+ } else {
+ rxq_sp = cnxk_eth_rxq_to_sp(
+ eth_dev->data->rx_queues[rx_queue_id]);
+ cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc |= cnxk_sso_rxq_enable(
+ cnxk_eth_dev, (uint16_t)rx_queue_id, port,
+ &queue_conf->ev,
+ !!(queue_conf->rx_queue_flags &
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID));
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, true,
+ dev->force_ena_bp);
+ }
+
+ if (rc < 0) {
+ plt_err("Failed to configure Rx adapter port=%d, q=%d", port,
+ queue_conf->ev.queue_id);
+ return rc;
+ }
+
+ dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags;
+
+ return 0;
+}
+
+int
+cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ struct cnxk_eth_rxq_sp *rxq_sp;
+ int i, rc = 0;
+
+ RTE_SET_USED(event_dev);
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+ cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i);
+ } else {
+ rxq_sp = cnxk_eth_rxq_to_sp(
+ eth_dev->data->rx_queues[rx_queue_id]);
+ rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id);
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, false,
+ dev->force_ena_bp);
+ }
+
+ if (rc < 0)
+ plt_err("Failed to clear Rx adapter config port=%d, q=%d",
+ eth_dev->data->port_id, rx_queue_id);
+
+ return rc;
+}
+
+int
+cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev)
+{
+ RTE_SET_USED(event_dev);
+ RTE_SET_USED(eth_dev);
+
+ return 0;
+}
+
+int
+cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev)
+{
+ RTE_SET_USED(event_dev);
+ RTE_SET_USED(eth_dev);
+
+ return 0;
+}
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 87bb9f76a..eda562f5b 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -21,4 +21,11 @@ sources = files(
'cnxk_tim_worker.c',
)
-deps += ['bus_pci', 'common_cnxk']
+extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
+foreach flag: extra_flags
+ if cc.has_argument(flag)
+ cflags += flag
+ endif
+endforeach
+
+deps += ['bus_pci', 'common_cnxk', 'net_cnxk']
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v6 2/7] event/cnxk: add Rx adapter fastpath ops
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 1/7] event/cnxk: add Rx adapter support pbhagavatula
@ 2021-07-02 21:14 ` pbhagavatula
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 3/7] event/cnxk: add Tx adapter support pbhagavatula
` (5 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-02 21:14 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Rx adapter fastpath operations.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 136 +++++++-
drivers/event/cnxk/cn10k_worker.c | 54 ----
drivers/event/cnxk/cn10k_worker.h | 97 +++++-
drivers/event/cnxk/cn10k_worker_deq.c | 44 +++
drivers/event/cnxk/cn10k_worker_deq_burst.c | 29 ++
drivers/event/cnxk/cn10k_worker_deq_tmo.c | 72 +++++
drivers/event/cnxk/cn9k_eventdev.c | 305 +++++++++++++++++-
drivers/event/cnxk/cn9k_worker.c | 117 -------
drivers/event/cnxk/cn9k_worker.h | 174 ++++++++--
drivers/event/cnxk/cn9k_worker_deq.c | 44 +++
drivers/event/cnxk/cn9k_worker_deq_burst.c | 29 ++
drivers/event/cnxk/cn9k_worker_deq_tmo.c | 72 +++++
drivers/event/cnxk/cn9k_worker_dual_deq.c | 53 +++
.../event/cnxk/cn9k_worker_dual_deq_burst.c | 30 ++
drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 89 +++++
drivers/event/cnxk/cnxk_eventdev.h | 1 +
drivers/event/cnxk/meson.build | 9 +
17 files changed, 1124 insertions(+), 231 deletions(-)
create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c
create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c
create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 2060c8fe8..ba7d95fff 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -237,17 +237,141 @@ static void
cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst;
-
- event_dev->dequeue = cn10k_sso_hws_deq;
- event_dev->dequeue_burst = cn10k_sso_hws_deq_burst;
- if (dev->is_timeout_deq) {
- event_dev->dequeue = cn10k_sso_hws_tmo_deq;
- event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_deq_seg
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_seg_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_tmo_deq_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_deq
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_tmo_deq
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_tmo_deq_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
}
}
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index 5dbae275b..c71aa3732 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -60,57 +60,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-
-uint16_t __rte_hot
-cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn10k_sso_hws *ws = port;
-
- RTE_SET_USED(timeout_ticks);
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
- return 1;
- }
-
- return cn10k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
- uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn10k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn10k_sso_hws *ws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
- return ret;
- }
-
- ret = cn10k_sso_hws_get_work(ws, ev);
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
- ret = cn10k_sso_hws_get_work(ws, ev);
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index c7250bf9e..b724083ca 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -87,20 +87,37 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
cn10k_sso_hws_fwd_group(ws, ev, grp);
}
+static __rte_always_inline void
+cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+ const uint32_t tag, const uint32_t flags,
+ const void *const lookup_mem)
+{
+ const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+
+ cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+ (struct rte_mbuf *)mbuf, lookup_mem,
+ mbuf_init | ((uint64_t)port_id) << 48, flags);
+}
+
static __rte_always_inline uint16_t
-cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
+cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
+ const uint32_t flags, void *lookup_mem)
{
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
gw.get_work = ws->gw_wdata;
#if defined(RTE_ARCH_ARM64) && !defined(__clang__)
asm volatile(
PLT_CPU_FEATURE_PREAMBLE
"caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
- : [wdata] "+r"(gw.get_work)
+ "sub %[mbuf], %H[wdata], #0x80 \n"
+ : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf)
: [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
: "memory");
#else
@@ -109,11 +126,34 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
roc_load_pair(gw.u64[0], gw.u64[1],
ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
+ ws->tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -128,6 +168,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t mbuf;
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -138,7 +179,9 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
" ldp %[tag], %[wqp], [%[tag_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
: "memory");
#else
@@ -146,12 +189,25 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
roc_load_pair(gw.u64[0], gw.u64[1],
ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, 0, NULL);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -170,16 +226,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
const struct rte_event ev[],
uint16_t nb_events);
-uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
#endif
diff --git a/drivers/event/cnxk/cn10k_worker_deq.c b/drivers/event/cnxk/cn10k_worker_deq.c
new file mode 100644
index 000000000..36ec454cc
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return 1; \
+ } \
+ \
+ return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return 1; \
+ } \
+ \
+ return cn10k_sso_hws_get_work( \
+ ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn10k_worker_deq_burst.c b/drivers/event/cnxk/cn10k_worker_deq_burst.c
new file mode 100644
index 000000000..29ecc551c
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq_burst.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn10k_worker_deq_tmo.c b/drivers/event/cnxk/cn10k_worker_deq_tmo.c
new file mode 100644
index 000000000..c8524a27b
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq_tmo.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return ret; \
+ } \
+ \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return ret; \
+ } \
+ \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 072800c24..e386cb784 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -252,17 +252,202 @@ static void
cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ /* Single WS modes */
+ const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_tmo[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_tmo_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_tmo_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_deq_tmo_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ /* Dual WS modes */
+ const event_dequeue_t sso_hws_dual_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_tmo[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_tmo_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_tmo_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_tmo_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
event_dev->enqueue = cn9k_sso_hws_enq;
event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst;
-
- event_dev->dequeue = cn9k_sso_hws_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_deq_burst;
- if (dev->deq_tmo_ns) {
- event_dev->dequeue = cn9k_sso_hws_tmo_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_deq_seg
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_seg_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_deq_tmo_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_tmo_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_deq
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_deq_tmo
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_tmo_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
}
if (dev->dual_ws) {
@@ -272,14 +457,110 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
event_dev->enqueue_forward_burst =
cn9k_sso_hws_dual_enq_fwd_burst;
- event_dev->dequeue = cn9k_sso_hws_dual_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst;
- if (dev->deq_tmo_ns) {
- event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq;
- event_dev->dequeue_burst =
- cn9k_sso_hws_dual_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_dual_deq_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_dual_deq_tmo_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst =
+ sso_hws_dual_deq_tmo_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_dual_deq
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_dual_deq_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_dual_deq_tmo
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst =
+ sso_hws_dual_deq_tmo_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ }
}
}
+
+ rte_mb();
}
static void *
diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c
index 9ceacc98d..538bc4b0b 100644
--- a/drivers/event/cnxk/cn9k_worker.c
+++ b/drivers/event/cnxk/cn9k_worker.c
@@ -60,60 +60,6 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-uint16_t __rte_hot
-cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws *ws = port;
-
- RTE_SET_USED(timeout_ticks);
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_op);
- return 1;
- }
-
- return cn9k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
- uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws *ws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_op);
- return ret;
- }
-
- ret = cn9k_sso_hws_get_work(ws, ev);
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
- ret = cn9k_sso_hws_get_work(ws, ev);
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
-
/* Dual ws ops. */
uint16_t __rte_hot
@@ -171,66 +117,3 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws_dual *dws = port;
- uint16_t gw;
-
- RTE_SET_USED(timeout_ticks);
- if (dws->swtag_req) {
- dws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
- return 1;
- }
-
- gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- return gw;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws_dual *dws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (dws->swtag_req) {
- dws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
- return ret;
- }
-
- ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) {
- ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- }
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index f5a440146..c01c00e1d 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -128,17 +128,36 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws,
}
}
+static __rte_always_inline void
+cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+ const uint32_t tag, const uint32_t flags,
+ const void *const lookup_mem)
+{
+ const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+
+ cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+ (struct rte_mbuf *)mbuf, lookup_mem,
+ mbuf_init | ((uint64_t)port_id) << 48, flags);
+}
+
static __rte_always_inline uint16_t
cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
struct cn9k_sso_hws_state *ws_pair,
- struct rte_event *ev)
+ struct rte_event *ev, const uint32_t flags,
+ const void *const lookup_mem,
+ struct cnxk_timesync_info *const tstamp)
{
const uint64_t set_gw = BIT_ULL(16) | 1;
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
+ if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+ rte_prefetch_non_temporal(lookup_mem);
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
"rty%=: \n"
@@ -147,7 +166,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
" tbnz %[tag], 63, rty%= \n"
"done%=: str %[gw], [%[pong]] \n"
" dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ " prfm pldl1keep, [%[mbuf]] \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op),
[gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op));
#else
@@ -156,12 +178,34 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
plt_write64(set_gw, ws_pair->getwrk_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -169,16 +213,22 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
}
static __rte_always_inline uint16_t
-cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
+cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev,
+ const uint32_t flags, const void *const lookup_mem)
{
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
plt_write64(BIT_ULL(16) | /* wait for work. */
1, /* Use Mask set 0. */
ws->getwrk_op);
+
+ if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+ rte_prefetch_non_temporal(lookup_mem);
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
" ldr %[tag], [%[tag_loc]] \n"
@@ -190,7 +240,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
" ldr %[wqp], [%[wqp_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ " prfm pldl1keep, [%[mbuf]] \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
#else
gw.u64[0] = plt_read64(ws->tag_op);
@@ -198,12 +251,35 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
+ ws->tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -218,6 +294,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t mbuf;
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -230,7 +307,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
" ldr %[wqp], [%[wqp_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
#else
gw.u64[0] = plt_read64(ws->tag_op);
@@ -238,12 +317,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, 0, NULL);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -274,28 +366,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port,
const struct rte_event ev[],
uint16_t nb_events);
-uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-
-uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
#endif
diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c
new file mode 100644
index 000000000..51ccaf4ec
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return 1; \
+ } \
+ \
+ return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return 1; \
+ } \
+ \
+ return cn9k_sso_hws_get_work( \
+ ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_deq_burst.c b/drivers/event/cnxk/cn9k_worker_deq_burst.c
new file mode 100644
index 000000000..4e2801459
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq_burst.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
new file mode 100644
index 000000000..9713d1ef0
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c
new file mode 100644
index 000000000..709fa2d9e
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t gw; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return 1; \
+ } \
+ \
+ gw = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ return gw; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t gw; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return 1; \
+ } \
+ \
+ gw = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ return gw; \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
new file mode 100644
index 000000000..d50e1cf83
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
new file mode 100644
index 000000000..a0508fdf0
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], \
+ &dws->ws_state[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ } \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_tmo_##name(port, ev, \
+ timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], \
+ &dws->ws_state[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ } \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index b65d725f5..9d5d2d033 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -33,6 +33,7 @@
#define CNXK_SSO_MZ_NAME "cnxk_evdev_mz"
#define CNXK_SSO_XAQ_CACHE_CNT (0x7)
#define CNXK_SSO_XAQ_SLACK (8)
+#define CNXK_SSO_WQE_SG_PTR (9)
#define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY)
#define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY)
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index eda562f5b..c5c1c0ee8 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -11,8 +11,17 @@ endif
sources = files(
'cn9k_eventdev.c',
'cn9k_worker.c',
+ 'cn9k_worker_deq.c',
+ 'cn9k_worker_deq_burst.c',
+ 'cn9k_worker_deq_tmo.c',
+ 'cn9k_worker_dual_deq.c',
+ 'cn9k_worker_dual_deq_burst.c',
+ 'cn9k_worker_dual_deq_tmo.c',
'cn10k_eventdev.c',
'cn10k_worker.c',
+ 'cn10k_worker_deq.c',
+ 'cn10k_worker_deq_burst.c',
+ 'cn10k_worker_deq_tmo.c',
'cnxk_eventdev.c',
'cnxk_eventdev_adptr.c',
'cnxk_eventdev_selftest.c',
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v6 3/7] event/cnxk: add Tx adapter support
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
@ 2021-07-02 21:14 ` pbhagavatula
2021-07-03 13:23 ` Nithin Dabilpuram
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
` (4 subsequent siblings)
6 siblings, 1 reply; 93+ messages in thread
From: pbhagavatula @ 2021-07-02 21:14 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Tx adapter.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 4 +-
doc/guides/rel_notes/release_21_08.rst | 6 +-
drivers/common/cnxk/roc_nix.h | 1 +
drivers/common/cnxk/roc_nix_queue.c | 7 +-
drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++
drivers/event/cnxk/cn9k_eventdev.c | 148 +++++++++++++++++++++++
drivers/event/cnxk/cnxk_eventdev.h | 22 +++-
drivers/event/cnxk/cnxk_eventdev_adptr.c | 88 ++++++++++++++
8 files changed, 358 insertions(+), 9 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index b7e82c127..6fdccc2ab 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -42,7 +42,9 @@ Features of the OCTEON cnxk SSO PMD are:
- HW managed packets enqueued from ethdev to eventdev exposed through event eth
RX adapter.
- N:1 ethernet device Rx queue to Event queue mapping.
-- Full Rx offload support defined through ethdev queue configuration.
+- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE``
+ capability while maintaining receive packet order.
+- Full Rx/Tx offload support defined through ethdev queue configuration.
Prerequisites and Compilation procedure
---------------------------------------
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 3892c8017..80ff93269 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -60,10 +60,10 @@ New Features
* Added net/cnxk driver which provides the support for the integrated ethernet
device.
-* **Added support for Marvell CN10K, CN9K, event Rx adapter.**
+* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.**
- * Added Rx adapter support for event/cnxk when the ethernet device requested is
- net/cnxk.
+ * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
+ is net/cnxk.
Removed Items
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index 76613fe84..822c1900e 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -200,6 +200,7 @@ struct roc_nix_sq {
uint64_t aura_handle;
int16_t nb_sqb_bufs_adj;
uint16_t nb_sqb_bufs;
+ uint16_t aura_sqb_bufs;
plt_iova_t io_addr;
void *lmt_addr;
void *sqe_mem;
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index 0604e7a18..f69771c15 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -587,12 +587,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
aura.fc_ena = 1;
aura.fc_addr = (uint64_t)sq->fc;
aura.fc_hyst_bits = 0; /* Store count on all updates */
- rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, nb_sqb_bufs, &aura,
+ rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, NIX_MAX_SQB, &aura,
&pool);
if (rc)
goto fail;
- sq->sqe_mem = plt_zmalloc(blk_sz * nb_sqb_bufs, blk_sz);
+ sq->sqe_mem = plt_zmalloc(blk_sz * NIX_MAX_SQB, blk_sz);
if (sq->sqe_mem == NULL) {
rc = NIX_ERR_NO_MEM;
goto nomem;
@@ -600,11 +600,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
/* Fill the initial buffers */
iova = (uint64_t)sq->sqe_mem;
- for (count = 0; count < nb_sqb_bufs; count++) {
+ for (count = 0; count < NIX_MAX_SQB; count++) {
roc_npa_aura_op_free(sq->aura_handle, 0, iova);
iova += blk_sz;
}
roc_npa_aura_op_range_set(sq->aura_handle, (uint64_t)sq->sqe_mem, iova);
+ sq->aura_sqb_bufs = NIX_MAX_SQB;
return rc;
nomem:
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index ba7d95fff..8a9b04a3d 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+ ws->tx_base = ws->base;
ws->hws_id = port_id;
ws->swtag_req = 0;
ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
}
+static int
+cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ if (dev->tx_adptr_data == NULL)
+ return 0;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ struct cn10k_sso_hws *ws = event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(ws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn10k_sso_hws) +
+ (sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = ws;
+ }
+
+ return 0;
+}
+
static void
cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
@@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
{
int rc;
+ rc = cn10k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+
rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
cn10k_sso_hws_flush_events);
if (rc < 0)
@@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (ret)
+ *caps = 0;
+ else
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+ return 0;
+}
+
+static int
+cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ rc = cn10k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+ cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ return cn10k_sso_updt_tx_adptr_data(event_dev);
+}
+
static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.dev_infos_get = cn10k_sso_info_get,
.dev_configure = cn10k_sso_dev_configure,
@@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get,
+ .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add,
+ .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index e386cb784..21f80323d 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
}
+static int
+cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ if (dev->tx_adptr_data == NULL)
+ return 0;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ if (dev->dual_ws) {
+ struct cn9k_sso_hws_dual *dws =
+ event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(dws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn9k_sso_hws_dual) +
+ (sizeof(uint64_t) *
+ (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ dws = RTE_PTR_ADD(ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&dws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = dws;
+ } else {
+ struct cn9k_sso_hws *ws = event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(ws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn9k_sso_hws_dual) +
+ (sizeof(uint64_t) *
+ (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ ws = RTE_PTR_ADD(ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = ws;
+ }
+ }
+ rte_mb();
+
+ return 0;
+}
+
static void
cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
@@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev)
{
int rc;
+ rc = cn9k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+
rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset,
cn9k_sso_hws_flush_events);
if (rc < 0)
@@ -844,6 +908,86 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (ret)
+ *caps = 0;
+ else
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+ return 0;
+}
+
+static void
+cn9k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id,
+ bool ena)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cn9k_eth_txq *txq;
+ struct roc_nix_sq *sq;
+ int i;
+
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+ cn9k_sso_txq_fc_update(eth_dev, i, ena);
+ } else {
+ uint16_t sq_limit;
+
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ txq = eth_dev->data->tx_queues[tx_queue_id];
+ sq_limit =
+ ena ? RTE_MIN(CNXK_SSO_SQB_LIMIT, sq->aura_sqb_bufs) :
+ sq->nb_sqb_bufs;
+ txq->nb_sqb_bufs_adj =
+ sq_limit -
+ RTE_ALIGN_MUL_CEIL(sq_limit,
+ (1ULL << txq->sqes_per_sqb_log2)) /
+ (1ULL << txq->sqes_per_sqb_log2);
+ txq->nb_sqb_bufs_adj = (70 * txq->nb_sqb_bufs_adj) / 100;
+ }
+}
+
+static int
+cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, true);
+ rc = cn9k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+ cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, false);
+ return cn9k_sso_updt_tx_adptr_data(event_dev);
+}
+
static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.dev_infos_get = cn9k_sso_info_get,
.dev_configure = cn9k_sso_dev_configure,
@@ -863,6 +1007,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get,
+ .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add,
+ .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 9d5d2d033..24e1be6a9 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -8,6 +8,7 @@
#include <rte_devargs.h>
#include <rte_ethdev.h>
#include <rte_event_eth_rx_adapter.h>
+#include <rte_event_eth_tx_adapter.h>
#include <rte_kvargs.h>
#include <rte_mbuf_pool_ops.h>
#include <rte_pci.h>
@@ -34,6 +35,7 @@
#define CNXK_SSO_XAQ_CACHE_CNT (0x7)
#define CNXK_SSO_XAQ_SLACK (8)
#define CNXK_SSO_WQE_SG_PTR (9)
+#define CNXK_SSO_SQB_LIMIT (0x180)
#define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY)
#define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY)
@@ -86,9 +88,12 @@ struct cnxk_sso_evdev {
rte_iova_t fc_iova;
struct rte_mempool *xaq_pool;
uint64_t rx_offloads;
+ uint64_t tx_offloads;
uint64_t adptr_xae_cnt;
uint16_t rx_adptr_pool_cnt;
uint64_t *rx_adptr_pools;
+ uint64_t *tx_adptr_data;
+ uint16_t max_port_id;
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
@@ -115,7 +120,10 @@ struct cn10k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
+ /* Tx Fastpath data */
+ uint64_t tx_base __rte_cache_aligned;
uintptr_t lmt_base;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
/* CN9K HWS ops */
@@ -140,7 +148,9 @@ struct cn9k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base;
+ /* Tx Fastpath data */
+ uint64_t base __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct cn9k_sso_hws_state {
@@ -160,7 +170,9 @@ struct cn9k_sso_hws_dual {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base[2];
+ /* Tx Fastpath data */
+ uint64_t base[2] __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct cnxk_sso_hws_cookie {
@@ -267,5 +279,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
const struct rte_eth_dev *eth_dev);
int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
const struct rte_eth_dev *eth_dev);
+int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id);
+int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id);
#endif /* __CNXK_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 3b7ecb375..502da272d 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -223,3 +223,91 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
return 0;
}
+
+static int
+cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs)
+{
+ return roc_npa_aura_limit_modify(
+ sq->aura_handle, RTE_MIN(nb_sqb_bufs, sq->aura_sqb_bufs));
+}
+
+static int
+cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev,
+ uint16_t eth_port_id, uint16_t tx_queue_id,
+ void *txq)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ uint16_t max_port_id = dev->max_port_id;
+ uint64_t *txq_data = dev->tx_adptr_data;
+
+ if (txq_data == NULL || eth_port_id > max_port_id) {
+ max_port_id = RTE_MAX(max_port_id, eth_port_id);
+ txq_data = rte_realloc_socket(
+ txq_data,
+ (sizeof(uint64_t) * (max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, event_dev->data->socket_id);
+ if (txq_data == NULL)
+ return -ENOMEM;
+ }
+
+ ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT])
+ txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq;
+ dev->max_port_id = max_port_id;
+ dev->tx_adptr_data = txq_data;
+ return 0;
+}
+
+int
+cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ struct roc_nix_sq *sq;
+ int i, ret;
+ void *txq;
+
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+ cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, i);
+ } else {
+ txq = eth_dev->data->tx_queues[tx_queue_id];
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id, txq);
+ if (ret < 0)
+ return ret;
+
+ dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags;
+ }
+
+ return 0;
+}
+
+int
+cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct roc_nix_sq *sq;
+ int i, ret;
+
+ RTE_SET_USED(event_dev);
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+ cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, i);
+ } else {
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id, NULL);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* Re: [dpdk-dev] [PATCH v6 3/7] event/cnxk: add Tx adapter support
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 3/7] event/cnxk: add Tx adapter support pbhagavatula
@ 2021-07-03 13:23 ` Nithin Dabilpuram
0 siblings, 0 replies; 93+ messages in thread
From: Nithin Dabilpuram @ 2021-07-03 13:23 UTC (permalink / raw)
To: pbhagavatula
Cc: jerinj, Shijith Thotton, Kiran Kumar K, Sunil Kumar Kori, Satha Rao, dev
On Sat, Jul 03, 2021 at 02:44:04AM +0530, pbhagavatula@marvell.com wrote:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Add support for event eth Tx adapter.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
> doc/guides/eventdevs/cnxk.rst | 4 +-
> doc/guides/rel_notes/release_21_08.rst | 6 +-
> drivers/common/cnxk/roc_nix.h | 1 +
> drivers/common/cnxk/roc_nix_queue.c | 7 +-
> drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++
> drivers/event/cnxk/cn9k_eventdev.c | 148 +++++++++++++++++++++++
> drivers/event/cnxk/cnxk_eventdev.h | 22 +++-
> drivers/event/cnxk/cnxk_eventdev_adptr.c | 88 ++++++++++++++
> 8 files changed, 358 insertions(+), 9 deletions(-)
>
> diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
> index b7e82c127..6fdccc2ab 100644
> --- a/doc/guides/eventdevs/cnxk.rst
> +++ b/doc/guides/eventdevs/cnxk.rst
> @@ -42,7 +42,9 @@ Features of the OCTEON cnxk SSO PMD are:
> - HW managed packets enqueued from ethdev to eventdev exposed through event eth
> RX adapter.
> - N:1 ethernet device Rx queue to Event queue mapping.
> -- Full Rx offload support defined through ethdev queue configuration.
> +- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE``
> + capability while maintaining receive packet order.
> +- Full Rx/Tx offload support defined through ethdev queue configuration.
>
> Prerequisites and Compilation procedure
> ---------------------------------------
> diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
> index 3892c8017..80ff93269 100644
> --- a/doc/guides/rel_notes/release_21_08.rst
> +++ b/doc/guides/rel_notes/release_21_08.rst
> @@ -60,10 +60,10 @@ New Features
> * Added net/cnxk driver which provides the support for the integrated ethernet
> device.
>
> -* **Added support for Marvell CN10K, CN9K, event Rx adapter.**
> +* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.**
>
> - * Added Rx adapter support for event/cnxk when the ethernet device requested is
> - net/cnxk.
> + * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
> + is net/cnxk.
>
>
> Removed Items
> diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
> index 76613fe84..822c1900e 100644
> --- a/drivers/common/cnxk/roc_nix.h
> +++ b/drivers/common/cnxk/roc_nix.h
> @@ -200,6 +200,7 @@ struct roc_nix_sq {
> uint64_t aura_handle;
> int16_t nb_sqb_bufs_adj;
> uint16_t nb_sqb_bufs;
> + uint16_t aura_sqb_bufs;
> plt_iova_t io_addr;
> void *lmt_addr;
> void *sqe_mem;
> diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
> index 0604e7a18..f69771c15 100644
> --- a/drivers/common/cnxk/roc_nix_queue.c
> +++ b/drivers/common/cnxk/roc_nix_queue.c
> @@ -587,12 +587,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
> aura.fc_ena = 1;
> aura.fc_addr = (uint64_t)sq->fc;
> aura.fc_hyst_bits = 0; /* Store count on all updates */
> - rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, nb_sqb_bufs, &aura,
> + rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, NIX_MAX_SQB, &aura,
> &pool);
> if (rc)
> goto fail;
>
> - sq->sqe_mem = plt_zmalloc(blk_sz * nb_sqb_bufs, blk_sz);
> + sq->sqe_mem = plt_zmalloc(blk_sz * NIX_MAX_SQB, blk_sz);
> if (sq->sqe_mem == NULL) {
> rc = NIX_ERR_NO_MEM;
> goto nomem;
> @@ -600,11 +600,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
>
> /* Fill the initial buffers */
> iova = (uint64_t)sq->sqe_mem;
> - for (count = 0; count < nb_sqb_bufs; count++) {
> + for (count = 0; count < NIX_MAX_SQB; count++) {
> roc_npa_aura_op_free(sq->aura_handle, 0, iova);
> iova += blk_sz;
> }
> roc_npa_aura_op_range_set(sq->aura_handle, (uint64_t)sq->sqe_mem, iova);
> + sq->aura_sqb_bufs = NIX_MAX_SQB;
Since now aura is created with NIX_MAX_SQB's, we need to also modify the aura
limit here to sq->nb_sqb_bufs for poll mode ?
With this fixed, Acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
>
> return rc;
> nomem:
> diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
> index ba7d95fff..8a9b04a3d 100644
> --- a/drivers/event/cnxk/cn10k_eventdev.c
> +++ b/drivers/event/cnxk/cn10k_eventdev.c
> @@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
> /* First cache line is reserved for cookie */
> ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
> ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
> + ws->tx_base = ws->base;
> ws->hws_id = port_id;
> ws->swtag_req = 0;
> ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
> @@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
> return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
> }
>
> +static int
> +cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
> +{
> + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
> + int i;
> +
> + if (dev->tx_adptr_data == NULL)
> + return 0;
> +
> + for (i = 0; i < dev->nb_event_ports; i++) {
> + struct cn10k_sso_hws *ws = event_dev->data->ports[i];
> + void *ws_cookie;
> +
> + ws_cookie = cnxk_sso_hws_get_cookie(ws);
> + ws_cookie = rte_realloc_socket(
> + ws_cookie,
> + sizeof(struct cnxk_sso_hws_cookie) +
> + sizeof(struct cn10k_sso_hws) +
> + (sizeof(uint64_t) * (dev->max_port_id + 1) *
> + RTE_MAX_QUEUES_PER_PORT),
> + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
> + if (ws_cookie == NULL)
> + return -ENOMEM;
> + ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
> + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
> + sizeof(uint64_t) * (dev->max_port_id + 1) *
> + RTE_MAX_QUEUES_PER_PORT);
> + event_dev->data->ports[i] = ws;
> + }
> +
> + return 0;
> +}
> +
> static void
> cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
> {
> @@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
> {
> int rc;
>
> + rc = cn10k_sso_updt_tx_adptr_data(event_dev);
> + if (rc < 0)
> + return rc;
> +
> rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
> cn10k_sso_hws_flush_events);
> if (rc < 0)
> @@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
> return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
> }
>
> +static int
> +cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
> + const struct rte_eth_dev *eth_dev, uint32_t *caps)
> +{
> + int ret;
> +
> + RTE_SET_USED(dev);
> + ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
> + if (ret)
> + *caps = 0;
> + else
> + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
> +
> + return 0;
> +}
> +
> +static int
> +cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
> + const struct rte_eth_dev *eth_dev,
> + int32_t tx_queue_id)
> +{
> + int rc;
> +
> + RTE_SET_USED(id);
> + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
> + if (rc < 0)
> + return rc;
> + rc = cn10k_sso_updt_tx_adptr_data(event_dev);
> + if (rc < 0)
> + return rc;
> + cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
> +
> + return 0;
> +}
> +
> +static int
> +cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
> + const struct rte_eth_dev *eth_dev,
> + int32_t tx_queue_id)
> +{
> + int rc;
> +
> + RTE_SET_USED(id);
> + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
> + if (rc < 0)
> + return rc;
> + return cn10k_sso_updt_tx_adptr_data(event_dev);
> +}
> +
> static struct rte_eventdev_ops cn10k_sso_dev_ops = {
> .dev_infos_get = cn10k_sso_info_get,
> .dev_configure = cn10k_sso_dev_configure,
> @@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
> .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
> .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
>
> + .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get,
> + .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add,
> + .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del,
> +
> .timer_adapter_caps_get = cnxk_tim_caps_get,
>
> .dump = cnxk_sso_dump,
> diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
> index e386cb784..21f80323d 100644
> --- a/drivers/event/cnxk/cn9k_eventdev.c
> +++ b/drivers/event/cnxk/cn9k_eventdev.c
> @@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
> return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
> }
>
> +static int
> +cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
> +{
> + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
> + int i;
> +
> + if (dev->tx_adptr_data == NULL)
> + return 0;
> +
> + for (i = 0; i < dev->nb_event_ports; i++) {
> + if (dev->dual_ws) {
> + struct cn9k_sso_hws_dual *dws =
> + event_dev->data->ports[i];
> + void *ws_cookie;
> +
> + ws_cookie = cnxk_sso_hws_get_cookie(dws);
> + ws_cookie = rte_realloc_socket(
> + ws_cookie,
> + sizeof(struct cnxk_sso_hws_cookie) +
> + sizeof(struct cn9k_sso_hws_dual) +
> + (sizeof(uint64_t) *
> + (dev->max_port_id + 1) *
> + RTE_MAX_QUEUES_PER_PORT),
> + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
> + if (ws_cookie == NULL)
> + return -ENOMEM;
> + dws = RTE_PTR_ADD(ws_cookie,
> + sizeof(struct cnxk_sso_hws_cookie));
> + memcpy(&dws->tx_adptr_data, dev->tx_adptr_data,
> + sizeof(uint64_t) * (dev->max_port_id + 1) *
> + RTE_MAX_QUEUES_PER_PORT);
> + event_dev->data->ports[i] = dws;
> + } else {
> + struct cn9k_sso_hws *ws = event_dev->data->ports[i];
> + void *ws_cookie;
> +
> + ws_cookie = cnxk_sso_hws_get_cookie(ws);
> + ws_cookie = rte_realloc_socket(
> + ws_cookie,
> + sizeof(struct cnxk_sso_hws_cookie) +
> + sizeof(struct cn9k_sso_hws_dual) +
> + (sizeof(uint64_t) *
> + (dev->max_port_id + 1) *
> + RTE_MAX_QUEUES_PER_PORT),
> + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
> + if (ws_cookie == NULL)
> + return -ENOMEM;
> + ws = RTE_PTR_ADD(ws_cookie,
> + sizeof(struct cnxk_sso_hws_cookie));
> + memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
> + sizeof(uint64_t) * (dev->max_port_id + 1) *
> + RTE_MAX_QUEUES_PER_PORT);
> + event_dev->data->ports[i] = ws;
> + }
> + }
> + rte_mb();
> +
> + return 0;
> +}
> +
> static void
> cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
> {
> @@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev)
> {
> int rc;
>
> + rc = cn9k_sso_updt_tx_adptr_data(event_dev);
> + if (rc < 0)
> + return rc;
> +
> rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset,
> cn9k_sso_hws_flush_events);
> if (rc < 0)
> @@ -844,6 +908,86 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
> return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
> }
>
> +static int
> +cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
> + const struct rte_eth_dev *eth_dev, uint32_t *caps)
> +{
> + int ret;
> +
> + RTE_SET_USED(dev);
> + ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
> + if (ret)
> + *caps = 0;
> + else
> + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
> +
> + return 0;
> +}
> +
> +static void
> +cn9k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id,
> + bool ena)
> +{
> + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
> + struct cn9k_eth_txq *txq;
> + struct roc_nix_sq *sq;
> + int i;
> +
> + if (tx_queue_id < 0) {
> + for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
> + cn9k_sso_txq_fc_update(eth_dev, i, ena);
> + } else {
> + uint16_t sq_limit;
> +
> + sq = &cnxk_eth_dev->sqs[tx_queue_id];
> + txq = eth_dev->data->tx_queues[tx_queue_id];
> + sq_limit =
> + ena ? RTE_MIN(CNXK_SSO_SQB_LIMIT, sq->aura_sqb_bufs) :
> + sq->nb_sqb_bufs;
> + txq->nb_sqb_bufs_adj =
> + sq_limit -
> + RTE_ALIGN_MUL_CEIL(sq_limit,
> + (1ULL << txq->sqes_per_sqb_log2)) /
> + (1ULL << txq->sqes_per_sqb_log2);
> + txq->nb_sqb_bufs_adj = (70 * txq->nb_sqb_bufs_adj) / 100;
> + }
> +}
> +
> +static int
> +cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
> + const struct rte_eth_dev *eth_dev,
> + int32_t tx_queue_id)
> +{
> + int rc;
> +
> + RTE_SET_USED(id);
> + rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
> + if (rc < 0)
> + return rc;
> + cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, true);
> + rc = cn9k_sso_updt_tx_adptr_data(event_dev);
> + if (rc < 0)
> + return rc;
> + cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
> +
> + return 0;
> +}
> +
> +static int
> +cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
> + const struct rte_eth_dev *eth_dev,
> + int32_t tx_queue_id)
> +{
> + int rc;
> +
> + RTE_SET_USED(id);
> + rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
> + if (rc < 0)
> + return rc;
> + cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, false);
> + return cn9k_sso_updt_tx_adptr_data(event_dev);
> +}
> +
> static struct rte_eventdev_ops cn9k_sso_dev_ops = {
> .dev_infos_get = cn9k_sso_info_get,
> .dev_configure = cn9k_sso_dev_configure,
> @@ -863,6 +1007,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = {
> .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
> .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
>
> + .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get,
> + .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add,
> + .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del,
> +
> .timer_adapter_caps_get = cnxk_tim_caps_get,
>
> .dump = cnxk_sso_dump,
> diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
> index 9d5d2d033..24e1be6a9 100644
> --- a/drivers/event/cnxk/cnxk_eventdev.h
> +++ b/drivers/event/cnxk/cnxk_eventdev.h
> @@ -8,6 +8,7 @@
> #include <rte_devargs.h>
> #include <rte_ethdev.h>
> #include <rte_event_eth_rx_adapter.h>
> +#include <rte_event_eth_tx_adapter.h>
> #include <rte_kvargs.h>
> #include <rte_mbuf_pool_ops.h>
> #include <rte_pci.h>
> @@ -34,6 +35,7 @@
> #define CNXK_SSO_XAQ_CACHE_CNT (0x7)
> #define CNXK_SSO_XAQ_SLACK (8)
> #define CNXK_SSO_WQE_SG_PTR (9)
> +#define CNXK_SSO_SQB_LIMIT (0x180)
>
> #define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY)
> #define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY)
> @@ -86,9 +88,12 @@ struct cnxk_sso_evdev {
> rte_iova_t fc_iova;
> struct rte_mempool *xaq_pool;
> uint64_t rx_offloads;
> + uint64_t tx_offloads;
> uint64_t adptr_xae_cnt;
> uint16_t rx_adptr_pool_cnt;
> uint64_t *rx_adptr_pools;
> + uint64_t *tx_adptr_data;
> + uint16_t max_port_id;
> uint16_t tim_adptr_ring_cnt;
> uint16_t *timer_adptr_rings;
> uint64_t *timer_adptr_sz;
> @@ -115,7 +120,10 @@ struct cn10k_sso_hws {
> uint64_t xaq_lmt __rte_cache_aligned;
> uint64_t *fc_mem;
> uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
> + /* Tx Fastpath data */
> + uint64_t tx_base __rte_cache_aligned;
> uintptr_t lmt_base;
> + uint8_t tx_adptr_data[];
> } __rte_cache_aligned;
>
> /* CN9K HWS ops */
> @@ -140,7 +148,9 @@ struct cn9k_sso_hws {
> uint64_t xaq_lmt __rte_cache_aligned;
> uint64_t *fc_mem;
> uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
> - uint64_t base;
> + /* Tx Fastpath data */
> + uint64_t base __rte_cache_aligned;
> + uint8_t tx_adptr_data[];
> } __rte_cache_aligned;
>
> struct cn9k_sso_hws_state {
> @@ -160,7 +170,9 @@ struct cn9k_sso_hws_dual {
> uint64_t xaq_lmt __rte_cache_aligned;
> uint64_t *fc_mem;
> uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
> - uint64_t base[2];
> + /* Tx Fastpath data */
> + uint64_t base[2] __rte_cache_aligned;
> + uint8_t tx_adptr_data[];
> } __rte_cache_aligned;
>
> struct cnxk_sso_hws_cookie {
> @@ -267,5 +279,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
> const struct rte_eth_dev *eth_dev);
> int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
> const struct rte_eth_dev *eth_dev);
> +int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
> + const struct rte_eth_dev *eth_dev,
> + int32_t tx_queue_id);
> +int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
> + const struct rte_eth_dev *eth_dev,
> + int32_t tx_queue_id);
>
> #endif /* __CNXK_EVENTDEV_H__ */
> diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
> index 3b7ecb375..502da272d 100644
> --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
> +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
> @@ -223,3 +223,91 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
>
> return 0;
> }
> +
> +static int
> +cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs)
> +{
> + return roc_npa_aura_limit_modify(
> + sq->aura_handle, RTE_MIN(nb_sqb_bufs, sq->aura_sqb_bufs));
> +}
> +
> +static int
> +cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev,
> + uint16_t eth_port_id, uint16_t tx_queue_id,
> + void *txq)
> +{
> + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
> + uint16_t max_port_id = dev->max_port_id;
> + uint64_t *txq_data = dev->tx_adptr_data;
> +
> + if (txq_data == NULL || eth_port_id > max_port_id) {
> + max_port_id = RTE_MAX(max_port_id, eth_port_id);
> + txq_data = rte_realloc_socket(
> + txq_data,
> + (sizeof(uint64_t) * (max_port_id + 1) *
> + RTE_MAX_QUEUES_PER_PORT),
> + RTE_CACHE_LINE_SIZE, event_dev->data->socket_id);
> + if (txq_data == NULL)
> + return -ENOMEM;
> + }
> +
> + ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT])
> + txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq;
> + dev->max_port_id = max_port_id;
> + dev->tx_adptr_data = txq_data;
> + return 0;
> +}
> +
> +int
> +cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
> + const struct rte_eth_dev *eth_dev,
> + int32_t tx_queue_id)
> +{
> + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
> + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
> + struct roc_nix_sq *sq;
> + int i, ret;
> + void *txq;
> +
> + if (tx_queue_id < 0) {
> + for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
> + cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, i);
> + } else {
> + txq = eth_dev->data->tx_queues[tx_queue_id];
> + sq = &cnxk_eth_dev->sqs[tx_queue_id];
> + cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT);
> + ret = cnxk_sso_updt_tx_queue_data(
> + event_dev, eth_dev->data->port_id, tx_queue_id, txq);
> + if (ret < 0)
> + return ret;
> +
> + dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags;
> + }
> +
> + return 0;
> +}
> +
> +int
> +cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
> + const struct rte_eth_dev *eth_dev,
> + int32_t tx_queue_id)
> +{
> + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
> + struct roc_nix_sq *sq;
> + int i, ret;
> +
> + RTE_SET_USED(event_dev);
> + if (tx_queue_id < 0) {
> + for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
> + cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, i);
> + } else {
> + sq = &cnxk_eth_dev->sqs[tx_queue_id];
> + cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs);
> + ret = cnxk_sso_updt_tx_queue_data(
> + event_dev, eth_dev->data->port_id, tx_queue_id, NULL);
> + if (ret < 0)
> + return ret;
> + }
> +
> + return 0;
> +}
> --
> 2.17.1
>
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v6 4/7] event/cnxk: add Tx adapter fastpath ops
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 3/7] event/cnxk: add Tx adapter support pbhagavatula
@ 2021-07-02 21:14 ` pbhagavatula
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
` (3 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-02 21:14 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Tx adapter fastpath operations.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 38 ++++++++
drivers/event/cnxk/cn10k_worker.h | 67 +++++++++++++
drivers/event/cnxk/cn10k_worker_tx_enq.c | 23 +++++
drivers/event/cnxk/cn10k_worker_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cn9k_eventdev.c | 81 ++++++++++++++++
drivers/event/cnxk/cn9k_worker.h | 97 +++++++++++++++++++
drivers/event/cnxk/cn9k_worker_dual_tx_enq.c | 23 +++++
.../event/cnxk/cn9k_worker_dual_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cn9k_worker_tx_enq.c | 23 +++++
drivers/event/cnxk/cn9k_worker_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cnxk_worker.h | 27 +++---
drivers/event/cnxk/meson.build | 6 ++
12 files changed, 440 insertions(+), 14 deletions(-)
create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c
create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 8a9b04a3d..e462f770c 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
#undef R
};
+ /* Tx modes */
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
@@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
}
}
+
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
+
+ event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
}
static void
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index b724083ca..3c90c8500 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -11,6 +11,7 @@
#include "cn10k_ethdev.h"
#include "cn10k_rx.h"
+#include "cn10k_tx.h"
/* SSO Operations */
@@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
NIX_RX_FASTPATH_MODES
#undef R
+static __rte_always_inline const struct cn10k_eth_txq *
+cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+ return (const struct cn10k_eth_txq *)
+ txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline uint16_t
+cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
+ uint64_t *cmd,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ const struct cn10k_eth_txq *txq;
+ struct rte_mbuf *m = ev->mbuf;
+ uint16_t ref_cnt = m->refcnt;
+ uintptr_t lmt_addr;
+ uint16_t lmt_id;
+ uintptr_t pa;
+
+ lmt_addr = ws->lmt_base;
+ ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+ txq = cn10k_sso_hws_xtract_meta(m, txq_data);
+ cn10k_nix_tx_skeleton(txq, cmd, flags);
+ /* Perform header writes before barrier for TSO */
+ if (flags & NIX_TX_OFFLOAD_TSO_F)
+ cn10k_nix_xmit_prepare_tso(m, flags);
+
+ cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt);
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw =
+ cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags);
+ pa = txq->io_addr | ((segdw - 1) << 4);
+ } else {
+ pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
+ }
+ if (!ev->sched_type)
+ cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
+
+ roc_lmt_submit_steorl(lmt_id, pa);
+
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if (ref_cnt > 1)
+ return 1;
+ }
+
+ cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
+ ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+ return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
#endif
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c
new file mode 100644
index 000000000..f9968ac0d
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn10k_sso_hws_event_tx( \
+ ws, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
new file mode 100644
index 000000000..a24fc42e5
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn10k_sso_hws_event_tx( \
+ ws, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 21f80323d..a69edff19 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -430,6 +430,39 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
#undef R
};
+ /* Tx modes */
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
event_dev->enqueue = cn9k_sso_hws_enq;
event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
@@ -510,6 +543,25 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
}
}
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
+
if (dev->dual_ws) {
event_dev->enqueue = cn9k_sso_hws_dual_enq;
event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst;
@@ -618,8 +670,37 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
NIX_RX_OFFLOAD_RSS_F)];
}
}
+
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM]
+ */
+ event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
}
+ event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
rte_mb();
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index c01c00e1d..3f9751211 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -11,6 +11,7 @@
#include "cn9k_ethdev.h"
#include "cn9k_rx.h"
+#include "cn9k_tx.h"
/* SSO Operations */
@@ -416,4 +417,100 @@ NIX_RX_FASTPATH_MODES
NIX_RX_FASTPATH_MODES
#undef R
+static __rte_always_inline void
+cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq)
+{
+ while (!(((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)
+ << (txq)->sqes_per_sqb_log2))
+ ;
+}
+
+static __rte_always_inline const struct cn9k_eth_txq *
+cn9k_sso_hws_xtract_meta(struct rte_mbuf *m,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+ return (const struct cn9k_eth_txq *)
+ txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline void
+cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m,
+ uint64_t *cmd, const uint32_t flags)
+{
+ roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags));
+ cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt);
+}
+
+static __rte_always_inline uint16_t
+cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ struct rte_mbuf *m = ev->mbuf;
+ const struct cn9k_eth_txq *txq;
+ uint16_t ref_cnt = m->refcnt;
+
+ /* Perform header writes before barrier for TSO */
+ cn9k_nix_xmit_prepare_tso(m, flags);
+ /* Lets commit any changes in the packet here in case when
+ * fast free is set as no further changes will be made to mbuf.
+ * In case of fast free is not set, both cn9k_nix_prepare_mseg()
+ * and cn9k_nix_xmit_prepare() has a barrier after refcnt update.
+ */
+ if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
+ rte_io_wmb();
+ txq = cn9k_sso_hws_xtract_meta(m, txq_data);
+ cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags);
+
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
+ if (!CNXK_TT_FROM_EVENT(ev->event)) {
+ cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
+ cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ cn9k_sso_txq_fc_wait(txq);
+ if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+ cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
+ txq->io_addr, segdw);
+ } else {
+ cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr,
+ segdw);
+ }
+ } else {
+ if (!CNXK_TT_FROM_EVENT(ev->event)) {
+ cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
+ cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ cn9k_sso_txq_fc_wait(txq);
+ if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+ cn9k_nix_xmit_one(cmd, txq->lmt_addr,
+ txq->io_addr, flags);
+ } else {
+ cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr,
+ flags);
+ }
+ }
+
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if (ref_cnt > 1)
+ return 1;
+ }
+
+ cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG,
+ base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+ return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
#endif
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
new file mode 100644
index 000000000..92e2981f0
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn9k_sso_hws_dual *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base[!ws->vws], &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
new file mode 100644
index 000000000..dfb574cf9
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn9k_sso_hws_dual *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base[!ws->vws], &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c
new file mode 100644
index 000000000..3df649c0c
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
new file mode 100644
index 000000000..0efe29113
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 4eb46ae16..7891b749d 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -79,21 +79,20 @@ static __rte_always_inline void
cnxk_sso_hws_head_wait(uintptr_t tag_op)
{
#ifdef RTE_ARCH_ARM64
- uint64_t swtp;
-
- asm volatile(PLT_CPU_FEATURE_PREAMBLE
- " ldr %[swtb], [%[swtp_loc]] \n"
- " tbz %[swtb], 35, done%= \n"
- " sevl \n"
- "rty%=: wfe \n"
- " ldr %[swtb], [%[swtp_loc]] \n"
- " tbnz %[swtb], 35, rty%= \n"
- "done%=: \n"
- : [swtb] "=&r"(swtp)
- : [swtp_loc] "r"(tag_op));
+ uint64_t tag;
+
+ asm volatile(" ldr %[tag], [%[tag_op]] \n"
+ " tbnz %[tag], 35, done%= \n"
+ " sevl \n"
+ "rty%=: wfe \n"
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbz %[tag], 35, rty%= \n"
+ "done%=: \n"
+ : [tag] "=&r"(tag)
+ : [tag_op] "r"(tag_op));
#else
- /* Wait for the SWTAG/SWTAG_FULL operation */
- while (plt_read64(tag_op) & BIT_ULL(35))
+ /* Wait for the HEAD to be set */
+ while (!(plt_read64(tag_op) & BIT_ULL(35)))
;
#endif
}
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index c5c1c0ee8..13e0634e8 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -17,11 +17,17 @@ sources = files(
'cn9k_worker_dual_deq.c',
'cn9k_worker_dual_deq_burst.c',
'cn9k_worker_dual_deq_tmo.c',
+ 'cn9k_worker_tx_enq.c',
+ 'cn9k_worker_tx_enq_seg.c',
+ 'cn9k_worker_dual_tx_enq.c',
+ 'cn9k_worker_dual_tx_enq_seg.c',
'cn10k_eventdev.c',
'cn10k_worker.c',
'cn10k_worker_deq.c',
'cn10k_worker_deq_burst.c',
'cn10k_worker_deq_tmo.c',
+ 'cn10k_worker_tx_enq.c',
+ 'cn10k_worker_tx_enq_seg.c',
'cnxk_eventdev.c',
'cnxk_eventdev_adptr.c',
'cnxk_eventdev_selftest.c',
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v6 5/7] event/cnxk: add Rx adapter vector support
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (2 preceding siblings ...)
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
@ 2021-07-02 21:14 ` pbhagavatula
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
` (2 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-02 21:14 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add event vector support for cnxk event Rx adapter, add control path
APIs to get vector limits and ability to configure event vectorization
on a given Rx queue.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 2 +
drivers/event/cnxk/cn10k_eventdev.c | 106 ++++++++++++++++++++++-
drivers/event/cnxk/cnxk_eventdev.h | 2 +
drivers/event/cnxk/cnxk_eventdev_adptr.c | 25 ++++++
drivers/net/cnxk/cnxk_ethdev.h | 2 +-
5 files changed, 135 insertions(+), 2 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 6fdccc2ab..0297cd3d5 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -45,6 +45,8 @@ Features of the OCTEON cnxk SSO PMD are:
- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE``
capability while maintaining receive packet order.
- Full Rx/Tx offload support defined through ethdev queue configuration.
+- HW managed event vectorization on CN10K for packets enqueued from ethdev to
+ eventdev configurable per each Rx queue in Rx adapter.
Prerequisites and Compilation procedure
---------------------------------------
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index e462f770c..e85fa4785 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
else
*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
- RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
return 0;
}
@@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn10k_sso_rx_adapter_vector_limits(
+ const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+ struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev;
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (ret)
+ return -ENOTSUP;
+
+ cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+ limits->log2_sz = true;
+ limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+ limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+ limits->min_timeout_ns =
+ (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100;
+ limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns;
+
+ return 0;
+}
+
+static int
+cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
+ uint16_t port_id, uint16_t rq_id, uint16_t sz,
+ uint64_t tmo_ns, struct rte_mempool *vmp)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+
+ if (!rq->sso_ena)
+ return -EINVAL;
+ if (rq->flow_tag_width == 0)
+ return -EINVAL;
+
+ rq->vwqe_ena = 1;
+ rq->vwqe_first_skip = 0;
+ rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id);
+ rq->vwqe_max_sz_exp = rte_log2_u32(sz);
+ rq->vwqe_wait_tmo =
+ tmo_ns /
+ ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100);
+ rq->tag_mask = (port_id & 0xF) << 20;
+ rq->tag_mask |=
+ (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4))
+ << 24;
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+static int
+cn10k_sso_rx_adapter_vector_config(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_event_vector_config *config)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev;
+ struct cnxk_sso_evdev *dev;
+ int i, rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ dev = cnxk_sso_pmd_priv(event_dev);
+ cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+ cnxk_sso_updt_xae_cnt(dev, config->vector_mp,
+ RTE_EVENT_TYPE_ETHDEV_VECTOR);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc = cnxk_sso_rx_adapter_vwqe_enable(
+ cnxk_eth_dev, eth_dev->data->port_id, i,
+ config->vector_sz, config->vector_timeout_ns,
+ config->vector_mp);
+ if (rc)
+ return -EINVAL;
+ }
+ } else {
+
+ cnxk_sso_updt_xae_cnt(dev, config->vector_mp,
+ RTE_EVENT_TYPE_ETHDEV_VECTOR);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc = cnxk_sso_rx_adapter_vwqe_enable(
+ cnxk_eth_dev, eth_dev->data->port_id, rx_queue_id,
+ config->vector_sz, config->vector_timeout_ns,
+ config->vector_mp);
+ if (rc)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int
cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
const struct rte_eth_dev *eth_dev, uint32_t *caps)
@@ -739,6 +839,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_rx_adapter_vector_limits_get = cn10k_sso_rx_adapter_vector_limits,
+ .eth_rx_adapter_event_vector_config =
+ cn10k_sso_rx_adapter_vector_config,
+
.eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get,
.eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add,
.eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 24e1be6a9..fc49b88d6 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -97,6 +97,8 @@ struct cnxk_sso_evdev {
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
+ uint16_t vec_pool_cnt;
+ uint64_t *vec_pools;
/* Dev args */
uint32_t xae_cnt;
uint8_t qos_queue_cnt;
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 502da272d..baf2f2aa6 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -38,6 +38,31 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
dev->adptr_xae_cnt += rxq->qconf.mp->size;
break;
}
+ case RTE_EVENT_TYPE_ETHDEV_VECTOR: {
+ struct rte_mempool *mp = data;
+ uint64_t *old_ptr;
+
+ for (i = 0; i < dev->vec_pool_cnt; i++) {
+ if ((uint64_t)mp == dev->vec_pools[i])
+ return;
+ }
+
+ dev->vec_pool_cnt++;
+ old_ptr = dev->vec_pools;
+ dev->vec_pools =
+ rte_realloc(dev->vec_pools,
+ sizeof(uint64_t) * dev->vec_pool_cnt, 0);
+ if (dev->vec_pools == NULL) {
+ dev->adptr_xae_cnt += mp->size;
+ dev->vec_pools = old_ptr;
+ dev->vec_pool_cnt--;
+ return;
+ }
+ dev->vec_pools[dev->vec_pool_cnt - 1] = (uint64_t)mp;
+
+ dev->adptr_xae_cnt += mp->size;
+ break;
+ }
case RTE_EVENT_TYPE_TIMER: {
struct cnxk_tim_ring *timr = data;
uint16_t *old_ring_ptr;
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 4eead0390..2528b3cda 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -238,7 +238,7 @@ struct cnxk_eth_txq_sp {
} __plt_cache_aligned;
static inline struct cnxk_eth_dev *
-cnxk_eth_pmd_priv(struct rte_eth_dev *eth_dev)
+cnxk_eth_pmd_priv(const struct rte_eth_dev *eth_dev)
{
return eth_dev->data->dev_private;
}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v6 6/7] event/cnxk: add Rx event vector fastpath
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (3 preceding siblings ...)
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
@ 2021-07-02 21:14 ` pbhagavatula
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 7/7] event/cnxk: add Tx " pbhagavatula
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 1/7] event/cnxk: add Rx adapter support pbhagavatula
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-02 21:14 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add Rx event vector fastpath to convert HW defined metadata into
rte_mbuf and rte_event_vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/rel_notes/release_21_08.rst | 1 +
drivers/event/cnxk/cn10k_worker.h | 56 +++++++
drivers/net/cnxk/cn10k_rx.h | 200 +++++++++++++++----------
drivers/net/cnxk/cn10k_rx_vec.c | 2 +-
drivers/net/cnxk/cn10k_rx_vec_mseg.c | 5 +-
5 files changed, 179 insertions(+), 85 deletions(-)
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 80ff93269..11ccc9bcb 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -64,6 +64,7 @@ New Features
* Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
is net/cnxk.
+ * Add support for event vectorization for Rx adapter.
Removed Items
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 3c90c8500..7a48a6b17 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,6 +5,8 @@
#ifndef __CN10K_WORKER_H__
#define __CN10K_WORKER_H__
+#include <rte_vect.h>
+
#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
@@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
mbuf_init | ((uint64_t)port_id) << 48, flags);
}
+static __rte_always_inline void
+cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags,
+ void *lookup_mem, void *tstamp)
+{
+ uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+ struct rte_event_vector *vec;
+ uint16_t nb_mbufs, non_vec;
+ uint64_t **wqe;
+
+ mbuf_init |= ((uint64_t)port_id) << 48;
+ vec = (struct rte_event_vector *)vwqe;
+ wqe = vec->u64s;
+
+ nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+ nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs,
+ flags | NIX_RX_VWQE_F, lookup_mem,
+ tstamp);
+ wqe += nb_mbufs;
+ non_vec = vec->nb_elem - nb_mbufs;
+
+ while (non_vec) {
+ struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+ struct rte_mbuf *mbuf;
+ uint64_t tstamp_ptr;
+
+ mbuf = (struct rte_mbuf *)((char *)cqe -
+ sizeof(struct rte_mbuf));
+ cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem,
+ mbuf_init, flags);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ wqe[0] = (uint64_t *)mbuf;
+ non_vec--;
+ wqe++;
+ }
+}
+
static __rte_always_inline uint16_t
cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
const uint32_t flags, void *lookup_mem)
@@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
flags & NIX_RX_MULTI_SEG_F,
(uint64_t *)tstamp_ptr);
gw.u64[1] = mbuf;
+ } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+ __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1];
+
+ vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) |
+ ((vwqe_hdr & 0xFFFF) << 48) |
+ ((uint64_t)port << 32);
+ *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr;
+ cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem,
+ ws->tstamp);
}
}
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index d9572b19e..a506a867c 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -21,6 +21,7 @@
* Defining it from backwards to denote its been
* not used as offload flags to pick function
*/
+#define NIX_RX_VWQE_F BIT(14)
#define NIX_RX_MULTI_SEG_F BIT(15)
#define CNXK_NIX_CQ_ENTRY_SZ 128
@@ -28,6 +29,11 @@
#define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x))
#define CQE_SZ(x) ((x) * CNXK_NIX_CQ_ENTRY_SZ)
+#define CQE_PTR_OFF(b, i, o, f) \
+ (((f) & NIX_RX_VWQE_F) ? \
+ (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \
+ (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o)))
+
union mbuf_initializer {
struct {
uint16_t data_off;
@@ -317,61 +323,87 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf)
}
static __rte_always_inline uint16_t
-cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t pkts, const uint16_t flags)
+cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts,
+ const uint16_t flags, void *lookup_mem,
+ struct cnxk_timesync_info *tstamp)
{
- struct cn10k_eth_rxq *rxq = rx_queue;
- uint16_t packets = 0;
+ struct cn10k_eth_rxq *rxq = args;
+ const uint64_t mbuf_initializer = (flags & NIX_RX_VWQE_F) ?
+ *(uint64_t *)args :
+ rxq->mbuf_initializer;
+ const uint64x2_t data_off = flags & NIX_RX_VWQE_F ?
+ vdupq_n_u64(0x80ULL) :
+ vdupq_n_u64(rxq->data_off);
+ const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask;
+ const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata;
+ const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc;
uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23;
- const uint64_t mbuf_initializer = rxq->mbuf_initializer;
- const uint64x2_t data_off = vdupq_n_u64(rxq->data_off);
uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3;
uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer);
struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3;
- const uint16_t *lookup_mem = rxq->lookup_mem;
- const uint32_t qmask = rxq->qmask;
- const uint64_t wdata = rxq->wdata;
- const uintptr_t desc = rxq->desc;
uint8x16_t f0, f1, f2, f3;
- uint32_t head = rxq->head;
+ uint16_t packets = 0;
uint16_t pkts_left;
-
- pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
- pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
-
- /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
- pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ uint32_t head;
+ uintptr_t cq0;
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ lookup_mem = rxq->lookup_mem;
+ head = rxq->head;
+
+ pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
+ pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
+ /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+ tstamp = rxq->tstamp;
+ } else {
+ RTE_SET_USED(head);
+ }
while (packets < pkts) {
- /* Exit loop if head is about to wrap and become unaligned */
- if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
- NIX_DESCS_PER_LOOP) {
- pkts_left += (pkts - packets);
- break;
- }
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Exit loop if head is about to wrap and become
+ * unaligned.
+ */
+ if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
+ NIX_DESCS_PER_LOOP) {
+ pkts_left += (pkts - packets);
+ break;
+ }
- const uintptr_t cq0 = desc + CQE_SZ(head);
+ cq0 = desc + CQE_SZ(head);
+ } else {
+ cq0 = (uintptr_t)&mbufs[packets];
+ }
/* Prefetch N desc ahead */
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(8)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(9)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(10)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(11)));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 8, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 9, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 10, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 11, 0, flags));
/* Get NIX_RX_SG_S for size and buffer pointer */
- cq0_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(0) + 64));
- cq1_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(1) + 64));
- cq2_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(2) + 64));
- cq3_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(3) + 64));
-
- /* Extract mbuf from NIX_RX_SG_S */
- mbuf01 = vzip2q_u64(cq0_w8, cq1_w8);
- mbuf23 = vzip2q_u64(cq2_w8, cq3_w8);
- mbuf01 = vqsubq_u64(mbuf01, data_off);
- mbuf23 = vqsubq_u64(mbuf23, data_off);
+ cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags));
+ cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags));
+ cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags));
+ cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags));
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Extract mbuf from NIX_RX_SG_S */
+ mbuf01 = vzip2q_u64(cq0_w8, cq1_w8);
+ mbuf23 = vzip2q_u64(cq2_w8, cq3_w8);
+ mbuf01 = vqsubq_u64(mbuf01, data_off);
+ mbuf23 = vqsubq_u64(mbuf23, data_off);
+ } else {
+ mbuf01 =
+ vsubq_u64(vld1q_u64((uint64_t *)cq0), data_off);
+ mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)),
+ data_off);
+ }
/* Move mbufs to scalar registers for future use */
mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0);
@@ -395,14 +427,14 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
f3 = vqtbl1q_u8(cq3_w8, shuf_msk);
/* Load CQE word0 and word 1 */
- uint64_t cq0_w0 = ((uint64_t *)(cq0 + CQE_SZ(0)))[0];
- uint64_t cq0_w1 = ((uint64_t *)(cq0 + CQE_SZ(0)))[1];
- uint64_t cq1_w0 = ((uint64_t *)(cq0 + CQE_SZ(1)))[0];
- uint64_t cq1_w1 = ((uint64_t *)(cq0 + CQE_SZ(1)))[1];
- uint64_t cq2_w0 = ((uint64_t *)(cq0 + CQE_SZ(2)))[0];
- uint64_t cq2_w1 = ((uint64_t *)(cq0 + CQE_SZ(2)))[1];
- uint64_t cq3_w0 = ((uint64_t *)(cq0 + CQE_SZ(3)))[0];
- uint64_t cq3_w1 = ((uint64_t *)(cq0 + CQE_SZ(3)))[1];
+ const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags);
+ const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 1, flags);
+ const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags);
+ const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 1, flags);
+ const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags);
+ const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 1, flags);
+ const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags);
+ const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 1, flags);
if (flags & NIX_RX_OFFLOAD_RSS_F) {
/* Fill rss in the rx_descriptor_fields1 */
@@ -459,17 +491,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) {
ol_flags0 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(0) + 38), ol_flags0,
- mbuf0);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags),
+ ol_flags0, mbuf0);
ol_flags1 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(1) + 38), ol_flags1,
- mbuf1);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags),
+ ol_flags1, mbuf1);
ol_flags2 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(2) + 38), ol_flags2,
- mbuf2);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags),
+ ol_flags2, mbuf2);
ol_flags3 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(3) + 38), ol_flags3,
- mbuf3);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags),
+ ol_flags3, mbuf3);
}
if (flags & NIX_RX_OFFLOAD_TSTAMP_F) {
@@ -488,7 +520,7 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
RTE_PTYPE_L2_ETHER_TIMESYNC};
const uint64_t ts_olf = PKT_RX_IEEE1588_PTP |
PKT_RX_IEEE1588_TMST |
- rxq->tstamp->rx_tstamp_dynflag;
+ tstamp->rx_tstamp_dynflag;
const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8};
uint64x2_t ts01, ts23, mask;
uint64_t ts[4];
@@ -526,14 +558,10 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
ts[3] = vgetq_lane_u64(ts23, 1);
/* Store timestamp into dynfield. */
- *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) =
- ts[0];
- *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) =
- ts[1];
- *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) =
- ts[2];
- *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) =
- ts[3];
+ *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0];
+ *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1];
+ *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2];
+ *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3];
/* Generate ptype mask to filter L2 ether timesync */
mask = vdupq_n_u32(vgetq_lane_u32(f0, 0));
@@ -559,9 +587,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
/* Update Rxq timestamp with the latest
* timestamp.
*/
- rxq->tstamp->rx_ready = 1;
- rxq->tstamp->rx_tstamp =
- ts[31 - __builtin_clz(res)];
+ tstamp->rx_ready = 1;
+ tstamp->rx_tstamp = ts[31 - __builtin_clz(res)];
}
}
@@ -584,25 +611,25 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
/* Store the mbufs to rx_pkts */
- vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
- vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
+ vst1q_u64((uint64_t *)&mbufs[packets], mbuf01);
+ vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23);
if (flags & NIX_RX_MULTI_SEG_F) {
/* Multi segment is enable build mseg list for
* individual mbufs in scalar mode.
*/
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(0) + 8), mbuf0,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 0, 8, flags)),
+ mbuf0, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(1) + 8), mbuf1,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 1, 8, flags)),
+ mbuf1, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(2) + 8), mbuf2,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 2, 8, flags)),
+ mbuf2, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(3) + 8), mbuf3,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 3, 8, flags)),
+ mbuf3, mbuf_initializer, flags);
} else {
/* Update that no more segments */
mbuf0->next = NULL;
@@ -623,12 +650,18 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
__mempool_check_cookies(mbuf2->pool, (void **)&mbuf2, 1, 1);
__mempool_check_cookies(mbuf3->pool, (void **)&mbuf3, 1, 1);
- /* Advance head pointer and packets */
- head += NIX_DESCS_PER_LOOP;
- head &= qmask;
packets += NIX_DESCS_PER_LOOP;
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Advance head pointer and packets */
+ head += NIX_DESCS_PER_LOOP;
+ head &= qmask;
+ }
}
+ if (flags & NIX_RX_VWQE_F)
+ return packets;
+
rxq->head = head;
rxq->available -= packets;
@@ -637,8 +670,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
plt_write64((rxq->wdata | packets), rxq->cq_door);
if (unlikely(pkts_left))
- packets += cn10k_nix_recv_pkts(rx_queue, &rx_pkts[packets],
- pkts_left, flags);
+ packets += cn10k_nix_recv_pkts(args, &mbufs[packets], pkts_left,
+ flags);
return packets;
}
@@ -647,12 +680,15 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
static inline uint16_t
cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t pkts, const uint16_t flags)
+ uint16_t pkts, const uint16_t flags,
+ void *lookup_mem, void *tstamp)
{
+ RTE_SET_USED(lookup_mem);
RTE_SET_USED(rx_queue);
RTE_SET_USED(rx_pkts);
RTE_SET_USED(pkts);
RTE_SET_USED(flags);
+ RTE_SET_USED(tstamp);
return 0;
}
diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c
index 93528a44f..166735ad5 100644
--- a/drivers/net/cnxk/cn10k_rx_vec.c
+++ b/drivers/net/cnxk/cn10k_rx_vec.c
@@ -12,7 +12,7 @@
uint16_t pkts) \
{ \
return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
- (flags)); \
+ (flags), NULL, NULL); \
}
NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
index 04d1e46c8..1f44ddddd 100644
--- a/drivers/net/cnxk/cn10k_rx_vec_mseg.c
+++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
@@ -9,8 +9,9 @@
uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
{ \
- return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
- (flags) | NIX_RX_MULTI_SEG_F); \
+ return cn10k_nix_recv_pkts_vector( \
+ rx_queue, rx_pkts, pkts, (flags) | NIX_RX_MULTI_SEG_F, \
+ NULL, NULL); \
}
NIX_RX_FASTPATH_MODES
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v6 7/7] event/cnxk: add Tx event vector fastpath
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (4 preceding siblings ...)
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
@ 2021-07-02 21:14 ` pbhagavatula
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 1/7] event/cnxk: add Rx adapter support pbhagavatula
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-02 21:14 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add Tx event vector fastpath, integrate event vector Tx routine
into Tx burst.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 1 +
doc/guides/rel_notes/release_21_08.rst | 2 +-
drivers/common/cnxk/roc_sso.h | 23 ++++++
drivers/event/cnxk/cn10k_eventdev.c | 3 +-
drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++--
drivers/event/cnxk/cn9k_worker.h | 4 +-
drivers/event/cnxk/cnxk_worker.h | 22 ------
drivers/net/cnxk/cn10k_tx.c | 2 +-
drivers/net/cnxk/cn10k_tx.h | 52 +++++++++----
drivers/net/cnxk/cn10k_tx_mseg.c | 3 +-
drivers/net/cnxk/cn10k_tx_vec.c | 2 +-
drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +-
12 files changed, 167 insertions(+), 53 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 0297cd3d5..53560d383 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -47,6 +47,7 @@ Features of the OCTEON cnxk SSO PMD are:
- Full Rx/Tx offload support defined through ethdev queue configuration.
- HW managed event vectorization on CN10K for packets enqueued from ethdev to
eventdev configurable per each Rx queue in Rx adapter.
+- Event vector transmission via Tx adapter.
Prerequisites and Compilation procedure
---------------------------------------
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 11ccc9bcb..9e49cb27d 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -64,7 +64,7 @@ New Features
* Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
is net/cnxk.
- * Add support for event vectorization for Rx adapter.
+ * Add support for event vectorization for Rx/Tx adapter.
Removed Items
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index a6030e7d8..316c6ccd5 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -44,6 +44,29 @@ struct roc_sso {
uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
} __plt_cache_aligned;
+static __rte_always_inline void
+roc_sso_hws_head_wait(uintptr_t tag_op)
+{
+#ifdef RTE_ARCH_ARM64
+ uint64_t tag;
+
+ asm volatile(PLT_CPU_FEATURE_PREAMBLE
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbnz %[tag], 35, done%= \n"
+ " sevl \n"
+ "rty%=: wfe \n"
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbz %[tag], 35, rty%= \n"
+ "done%=: \n"
+ : [tag] "=&r"(tag)
+ : [tag_op] "r"(tag_op));
+#else
+ /* Wait for the SWTAG/SWTAG_FULL operation */
+ while (!(plt_read64(tag_op) & BIT_ULL(35)))
+ ;
+#endif
+}
+
/* SSO device initialization */
int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso);
int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso);
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index e85fa4785..6f37c5bd2 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
if (ret)
*caps = 0;
else
- *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
return 0;
}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 7a48a6b17..9cc099206 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
NIX_RX_FASTPATH_MODES
#undef R
-static __rte_always_inline const struct cn10k_eth_txq *
+static __rte_always_inline struct cn10k_eth_txq *
cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
{
- return (const struct cn10k_eth_txq *)
+ return (struct cn10k_eth_txq *)
txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
}
+static __rte_always_inline void
+cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+ uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr,
+ uint8_t sched_type, uintptr_t base,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ uint16_t port[4], queue[4];
+ struct cn10k_eth_txq *txq;
+ uint16_t i, j;
+ uintptr_t pa;
+
+ for (i = 0; i < nb_mbufs; i += 4) {
+ port[0] = mbufs[i]->port;
+ port[1] = mbufs[i + 1]->port;
+ port[2] = mbufs[i + 2]->port;
+ port[3] = mbufs[i + 3]->port;
+
+ queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
+ queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]);
+ queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]);
+ queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]);
+
+ if (((port[0] ^ port[1]) & (port[2] ^ port[3])) ||
+ ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) {
+
+ for (j = 0; j < 4; j++) {
+ struct rte_mbuf *m = mbufs[i + j];
+
+ txq = (struct cn10k_eth_txq *)
+ txq_data[port[j]][queue[j]];
+ cn10k_nix_tx_skeleton(txq, cmd, flags);
+ /* Perform header writes before barrier
+ * for TSO
+ */
+ if (flags & NIX_TX_OFFLOAD_TSO_F)
+ cn10k_nix_xmit_prepare_tso(m, flags);
+
+ cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags,
+ txq->lso_tun_fmt);
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw =
+ cn10k_nix_prepare_mseg(
+ m, (uint64_t *)lmt_addr,
+ flags);
+ pa = txq->io_addr | ((segdw - 1) << 4);
+ } else {
+ pa = txq->io_addr |
+ (cn10k_nix_tx_ext_subs(flags) + 1)
+ << 4;
+ }
+ if (!sched_type)
+ roc_sso_hws_head_wait(base +
+ SSOW_LF_GWS_TAG);
+
+ roc_lmt_submit_steorl(lmt_id, pa);
+ }
+ } else {
+ txq = (struct cn10k_eth_txq *)
+ txq_data[port[0]][queue[0]];
+ cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base
+ + SSOW_LF_GWS_TAG,
+ flags | NIX_TX_VWQE_F);
+ }
+ }
+}
+
static __rte_always_inline uint16_t
cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
uint64_t *cmd,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
const uint32_t flags)
{
- const struct cn10k_eth_txq *txq;
- struct rte_mbuf *m = ev->mbuf;
- uint16_t ref_cnt = m->refcnt;
+ struct cn10k_eth_txq *txq;
+ struct rte_mbuf *m;
uintptr_t lmt_addr;
+ uint16_t ref_cnt;
uint16_t lmt_id;
uintptr_t pa;
lmt_addr = ws->lmt_base;
ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+ if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
+ struct rte_mbuf **mbufs = ev->vec->mbufs;
+ uint64_t meta = *(uint64_t *)ev->vec;
+
+ if (meta & BIT(31)) {
+ txq = (struct cn10k_eth_txq *)
+ txq_data[meta >> 32][meta >> 48];
+
+ cn10k_nix_xmit_pkts_vector(
+ txq, mbufs, meta & 0xFFFF, cmd,
+ ws->tx_base + SSOW_LF_GWS_TAG,
+ flags | NIX_TX_VWQE_F);
+ } else {
+ cn10k_sso_vwqe_split_tx(
+ mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr,
+ ev->sched_type, ws->tx_base, txq_data, flags);
+ }
+ rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+ return (meta & 0xFFFF);
+ }
+
+ m = ev->mbuf;
+ ref_cnt = m->refcnt;
txq = cn10k_sso_hws_xtract_meta(m, txq_data);
cn10k_nix_tx_skeleton(txq, cmd, flags);
/* Perform header writes before barrier for TSO */
@@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
}
if (!ev->sched_type)
- cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
roc_lmt_submit_steorl(lmt_id, pa);
@@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
-
return 1;
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 3f9751211..cc1e14195 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -466,7 +466,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
- cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
cn9k_sso_txq_fc_wait(txq);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
@@ -478,7 +478,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
} else {
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
- cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
cn9k_sso_txq_fc_wait(txq);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_one(cmd, txq->lmt_addr,
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 7891b749d..9f9ceab8a 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -75,26 +75,4 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
#endif
}
-static __rte_always_inline void
-cnxk_sso_hws_head_wait(uintptr_t tag_op)
-{
-#ifdef RTE_ARCH_ARM64
- uint64_t tag;
-
- asm volatile(" ldr %[tag], [%[tag_op]] \n"
- " tbnz %[tag], 35, done%= \n"
- " sevl \n"
- "rty%=: wfe \n"
- " ldr %[tag], [%[tag_op]] \n"
- " tbz %[tag], 35, rty%= \n"
- "done%=: \n"
- : [tag] "=&r"(tag)
- : [tag_op] "r"(tag_op));
-#else
- /* Wait for the HEAD to be set */
- while (!(plt_read64(tag_op) & BIT_ULL(35)))
- ;
-#endif
-}
-
#endif
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 1f30bab59..0e1276c60 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -16,7 +16,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \
- flags); \
+ 0, flags); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index eb148b8e7..f75cae07a 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -18,6 +18,7 @@
* Defining it from backwards to denote its been
* not used as offload flags to pick function
*/
+#define NIX_TX_VWQE_F BIT(14)
#define NIX_TX_MULTI_SEG_F BIT(15)
#define NIX_TX_NEED_SEND_HDR_W1 \
@@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
- uint64_t *cmd, const uint16_t flags)
+ uint64_t *cmd, uintptr_t base, const uint16_t flags)
{
struct cn10k_eth_txq *txq = tx_queue;
const rte_iova_t io_addr = txq->io_addr;
@@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
uint64_t lso_tun_fmt;
uint64_t data;
- NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ if (!(flags & NIX_TX_VWQE_F)) {
+ NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ /* Reduce the cached count */
+ txq->fc_cache_pkts -= pkts;
+ }
/* Get cmd skeleton */
cn10k_nix_tx_skeleton(txq, cmd, flags);
- /* Reduce the cached count */
- txq->fc_cache_pkts -= pkts;
-
if (flags & NIX_TX_OFFLOAD_TSO_F)
lso_tun_fmt = txq->lso_tun_fmt;
@@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
}
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
/* Trigger LMTST */
if (burst > 16) {
data = cn10k_nix_tx_steor_data(flags);
@@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
struct cn10k_eth_txq *txq = tx_queue;
uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
@@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
shft += 3;
}
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
data0 = (uint64_t)data128;
data1 = (uint64_t)(data128 >> 64);
/* Make data0 similar to data1 */
@@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
@@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64_t data[2];
} wd;
- NIX_XMIT_FC_OR_RETURN(txq, pkts);
-
- scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
- pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ if (!(flags & NIX_TX_VWQE_F)) {
+ NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ /* Reduce the cached count */
+ txq->fc_cache_pkts -= pkts;
+ } else {
+ scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ }
- /* Reduce the cached count */
- txq->fc_cache_pkts -= pkts;
/* Perform header writes before barrier for TSO */
if (flags & NIX_TX_OFFLOAD_TSO_F) {
for (i = 0; i < pkts; i++)
@@ -1973,6 +1987,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (flags & NIX_TX_MULTI_SEG_F)
wd.data[0] >>= 16;
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
/* Trigger LMTST */
if (lnum > 16) {
if (!(flags & NIX_TX_MULTI_SEG_F))
@@ -2029,10 +2046,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (unlikely(scalar)) {
if (flags & NIX_TX_MULTI_SEG_F)
pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
- scalar, cmd, flags);
+ scalar, cmd, base,
+ flags);
else
pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
- cmd, flags);
+ cmd, base, flags);
}
return pkts;
@@ -2041,13 +2059,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
#else
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
RTE_SET_USED(tx_queue);
RTE_SET_USED(tx_pkts);
RTE_SET_USED(pkts);
RTE_SET_USED(cmd);
RTE_SET_USED(flags);
+ RTE_SET_USED(base);
return 0;
}
#endif
diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c
index 33f675472..4ea4c8a4e 100644
--- a/drivers/net/cnxk/cn10k_tx_mseg.c
+++ b/drivers/net/cnxk/cn10k_tx_mseg.c
@@ -18,7 +18,8 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \
- (flags) | NIX_TX_MULTI_SEG_F); \
+ 0, (flags) \
+ | NIX_TX_MULTI_SEG_F); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index 34e373750..a0350496a 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -18,7 +18,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
- (flags)); \
+ 0, (flags)); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
index 1fad81dba..7f98f79b9 100644
--- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c
+++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
@@ -16,7 +16,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector( \
- tx_queue, tx_pkts, pkts, cmd, \
+ tx_queue, tx_pkts, pkts, cmd, 0, \
(flags) | NIX_TX_MULTI_SEG_F); \
}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v7 1/7] event/cnxk: add Rx adapter support
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (5 preceding siblings ...)
2021-07-02 21:14 ` [dpdk-dev] [PATCH v6 7/7] event/cnxk: add Tx " pbhagavatula
@ 2021-07-03 22:00 ` pbhagavatula
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
` (6 more replies)
6 siblings, 7 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-03 22:00 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Ray Kinsella,
Neil Horman
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Rx adapter.
Resize cn10k workslot fastpath structure to fit in 64B cacheline size.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v7 Changes:
- Set correct limits for SQB aura.
v6 Changes:
- More code cleanup.
- Fix incorrect SQB configuration and missing fc check.
v5 Changes:
- Use cnxk_eth_rxq_to_sp instead of manually calculating sp offset.
v4 Changes:
- Split patches for easier merge.
v3 Changes:
- Spell check.
doc/guides/eventdevs/cnxk.rst | 28 ++++
doc/guides/rel_notes/release_21_08.rst | 5 +
drivers/common/cnxk/roc_nix.h | 3 +
drivers/common/cnxk/roc_nix_fc.c | 78 +++++++++++
drivers/common/cnxk/roc_nix_priv.h | 3 +-
drivers/common/cnxk/version.map | 1 +
drivers/event/cnxk/cn10k_eventdev.c | 107 ++++++++++++---
drivers/event/cnxk/cn10k_worker.c | 7 +-
drivers/event/cnxk/cn10k_worker.h | 32 +++--
drivers/event/cnxk/cn9k_eventdev.c | 89 +++++++++++++
drivers/event/cnxk/cn9k_worker.h | 4 +
drivers/event/cnxk/cnxk_eventdev.c | 2 +
drivers/event/cnxk/cnxk_eventdev.h | 43 ++++--
drivers/event/cnxk/cnxk_eventdev_adptr.c | 158 +++++++++++++++++++++++
drivers/event/cnxk/meson.build | 9 +-
15 files changed, 522 insertions(+), 47 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 36da3800c..b7e82c127 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -39,6 +39,10 @@ Features of the OCTEON cnxk SSO PMD are:
time granularity of 2.5us on CN9K and 1us on CN10K.
- Up to 256 TIM rings a.k.a event timer adapters.
- Up to 8 rings traversed in parallel.
+- HW managed packets enqueued from ethdev to eventdev exposed through event eth
+ RX adapter.
+- N:1 ethernet device Rx queue to Event queue mapping.
+- Full Rx offload support defined through ethdev queue configuration.
Prerequisites and Compilation procedure
---------------------------------------
@@ -93,6 +97,15 @@ Runtime Config Options
-a 0002:0e:00.0,qos=[1-50-50-50]
+- ``Force Rx Back pressure``
+
+ Force Rx back pressure when same mempool is used across ethernet device
+ connected to event device.
+
+ For example::
+
+ -a 0002:0e:00.0,force_rx_bp=1
+
- ``TIM disable NPA``
By default chunks are allocated from NPA then TIM can automatically free
@@ -160,3 +173,18 @@ Debugging Options
+---+------------+-------------------------------------------------------+
| 2 | TIM | --log-level='pmd\.event\.cnxk\.timer,8' |
+---+------------+-------------------------------------------------------+
+
+Limitations
+-----------
+
+Rx adapter support
+~~~~~~~~~~~~~~~~~~
+
+Using the same mempool for all the ethernet device ports connected to
+event device would cause back pressure to be asserted only on the first
+ethernet device.
+Back pressure is automatically disabled when using same mempool for all the
+ethernet devices connected to event device to override this applications can
+use `force_rx_bp=1` device arguments.
+Using unique mempool per each ethernet device is recommended when they are
+connected to event device.
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 31e49e1a5..3892c8017 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -60,6 +60,11 @@ New Features
* Added net/cnxk driver which provides the support for the integrated ethernet
device.
+* **Added support for Marvell CN10K, CN9K, event Rx adapter.**
+
+ * Added Rx adapter support for event/cnxk when the ethernet device requested is
+ net/cnxk.
+
Removed Items
-------------
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index bb6902795..76613fe84 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix,
enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix);
+void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id,
+ uint8_t ena, uint8_t force);
+
/* NPC */
int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable);
diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c
index 47be8aa3f..f17eba416 100644
--- a/drivers/common/cnxk/roc_nix_fc.c
+++ b/drivers/common/cnxk/roc_nix_fc.c
@@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode mode)
exit:
return rc;
}
+
+void
+rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena,
+ uint8_t force)
+{
+ struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+ struct npa_lf *lf = idev_npa_obj_get();
+ struct npa_aq_enq_req *req;
+ struct npa_aq_enq_rsp *rsp;
+ struct mbox *mbox;
+ uint32_t limit;
+ int rc;
+
+ if (roc_nix_is_sdp(roc_nix))
+ return;
+
+ if (!lf)
+ return;
+ mbox = lf->mbox;
+
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_READ;
+
+ rc = mbox_process_msg(mbox, (void *)&rsp);
+ if (rc)
+ return;
+
+ limit = rsp->aura.limit;
+ /* BP is already enabled. */
+ if (rsp->aura.bp_ena) {
+ /* If BP ids don't match disable BP. */
+ if ((rsp->aura.nix0_bpid != nix->bpid[0]) && !force) {
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_WRITE;
+
+ req->aura.bp_ena = 0;
+ req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena);
+
+ mbox_process(mbox);
+ }
+ return;
+ }
+
+ /* BP was previously enabled but now disabled skip. */
+ if (rsp->aura.bp)
+ return;
+
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_WRITE;
+
+ if (ena) {
+ req->aura.nix0_bpid = nix->bpid[0];
+ req->aura_mask.nix0_bpid = ~(req->aura_mask.nix0_bpid);
+ req->aura.bp = NIX_RQ_AURA_THRESH(
+ limit > 128 ? 256 : limit); /* 95% of size*/
+ req->aura_mask.bp = ~(req->aura_mask.bp);
+ }
+
+ req->aura.bp_ena = !!ena;
+ req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena);
+
+ mbox_process(mbox);
+}
diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h
index d9c32df44..9dc0c88a6 100644
--- a/drivers/common/cnxk/roc_nix_priv.h
+++ b/drivers/common/cnxk/roc_nix_priv.h
@@ -16,7 +16,8 @@
#define NIX_SQB_LOWER_THRESH ((uint16_t)70)
/* Apply BP/DROP when CQ is 95% full */
-#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100)
+#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100)
+#define NIX_RQ_AURA_THRESH(x) (((x) * 95) / 100)
/* IRQ triggered when NIX_LF_CINTX_CNT[QCOUNT] crosses this value */
#define CQ_CQE_THRESH_DEFAULT 0x1ULL
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 8a5c839e5..cb1ce4b6f 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -29,6 +29,7 @@ INTERNAL {
roc_nix_fc_config_set;
roc_nix_fc_mode_set;
roc_nix_fc_mode_get;
+ rox_nix_fc_npa_bp_cfg;
roc_nix_get_base_chan;
roc_nix_get_pf;
roc_nix_get_pf_func;
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index bf4052c76..2060c8fe8 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -6,18 +6,6 @@
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
-static void
-cn10k_init_hws_ops(struct cn10k_sso_hws *ws, uintptr_t base)
-{
- ws->tag_wqe_op = base + SSOW_LF_GWS_WQE0;
- ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0;
- ws->updt_wqe_op = base + SSOW_LF_GWS_OP_UPD_WQP_GRP1;
- ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM;
- ws->swtag_untag_op = base + SSOW_LF_GWS_OP_SWTAG_UNTAG;
- ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH;
- ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED;
-}
-
static uint32_t
cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev)
{
@@ -56,7 +44,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
- cn10k_init_hws_ops(ws, ws->base);
ws->hws_id = port_id;
ws->swtag_req = 0;
ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -135,13 +122,14 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
cq_ds_cnt &= 0x3FFF3FFF0000;
while (aq_cnt || cq_ds_cnt || ds_cnt) {
- plt_write64(req, ws->getwrk_op);
+ plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
cn10k_sso_hws_get_work_empty(ws, &ev);
if (fn != NULL && ev.u64 != 0)
fn(arg, ev);
if (ev.sched_type != SSO_TT_EMPTY)
- cnxk_sso_hws_swtag_flush(ws->tag_wqe_op,
- ws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(
+ ws->base + SSOW_LF_GWS_WQE0,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
do {
val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
} while (val & BIT_ULL(56));
@@ -205,9 +193,11 @@ cn10k_sso_hws_reset(void *arg, void *hws)
if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) !=
SSO_TT_EMPTY) {
- plt_write64(BIT_ULL(16) | 1, ws->getwrk_op);
+ plt_write64(BIT_ULL(16) | 1,
+ ws->base + SSOW_LF_GWS_OP_GET_WORK0);
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
@@ -407,6 +397,80 @@ cn10k_sso_selftest(void)
return cnxk_sso_selftest(RTE_STR(event_cn10k));
}
+static int
+cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int rc;
+
+ RTE_SET_USED(event_dev);
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9);
+ if (rc)
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+ else
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+ return 0;
+}
+
+static void
+cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem,
+ void *tstmp_info)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ struct cn10k_sso_hws *ws = event_dev->data->ports[i];
+ ws->lookup_mem = lookup_mem;
+ ws->tstamp = tstmp_info;
+ }
+}
+
+static int
+cn10k_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cn10k_eth_rxq *rxq;
+ void *lookup_mem;
+ void *tstmp_info;
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id,
+ queue_conf);
+ if (rc)
+ return -EINVAL;
+ rxq = eth_dev->data->rx_queues[0];
+ lookup_mem = rxq->lookup_mem;
+ tstmp_info = rxq->tstamp;
+ cn10k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info);
+ cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.dev_infos_get = cn10k_sso_info_get,
.dev_configure = cn10k_sso_dev_configure,
@@ -420,6 +484,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.port_unlink = cn10k_sso_port_unlink,
.timeout_ticks = cnxk_sso_timeout_ticks,
+ .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get,
+ .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add,
+ .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del,
+ .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+ .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
@@ -502,6 +572,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn10k, cn10k_pci_sso_map);
RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
CNXK_SSO_GGRP_QOS "=<string>"
+ CNXK_SSO_FORCE_BP "=1"
CN10K_SSO_GW_MODE "=<int>"
CNXK_TIM_DISABLE_NPA "=1"
CNXK_TIM_CHNK_SLOTS "=<int>"
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index e2aa534c6..5dbae275b 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -18,7 +18,8 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev)
cn10k_sso_hws_forward_event(ws, ev);
break;
case RTE_EVENT_OP_RELEASE:
- cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, ws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
break;
default:
return 0;
@@ -69,7 +70,7 @@ cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
if (ws->swtag_req) {
ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
return 1;
}
@@ -94,7 +95,7 @@ cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
if (ws->swtag_req) {
ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
return ret;
}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 2f093a8dd..c7250bf9e 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,9 +5,13 @@
#ifndef __CN10K_WORKER_H__
#define __CN10K_WORKER_H__
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
+#include "cn10k_ethdev.h"
+#include "cn10k_rx.h"
+
/* SSO Operations */
static __rte_always_inline uint8_t
@@ -31,7 +35,8 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
{
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(ws->tag_wqe_op));
+ const uint8_t cur_tt =
+ CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0));
/* CNXK model
* cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
@@ -43,9 +48,11 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
if (new_tt == SSO_TT_UNTAGGED) {
if (cur_tt != SSO_TT_UNTAGGED)
- cnxk_sso_hws_swtag_untag(ws->swtag_untag_op);
+ cnxk_sso_hws_swtag_untag(ws->base +
+ SSOW_LF_GWS_OP_SWTAG_UNTAG);
} else {
- cnxk_sso_hws_swtag_norm(tag, new_tt, ws->swtag_norm_op);
+ cnxk_sso_hws_swtag_norm(tag, new_tt,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_NORM);
}
ws->swtag_req = 1;
}
@@ -57,8 +64,9 @@ cn10k_sso_hws_fwd_group(struct cn10k_sso_hws *ws, const struct rte_event *ev,
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- plt_write64(ev->u64, ws->updt_wqe_op);
- cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op);
+ plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+ cnxk_sso_hws_swtag_desched(tag, new_tt, grp,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
}
static __rte_always_inline void
@@ -68,7 +76,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
const uint8_t grp = ev->queue_id;
/* Group hasn't changed, Use SWTAG to forward the event */
- if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_wqe_op)) == grp)
+ if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp)
cn10k_sso_hws_fwd_swtag(ws, ev);
else
/*
@@ -93,12 +101,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
PLT_CPU_FEATURE_PREAMBLE
"caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
: [wdata] "+r"(gw.get_work)
- : [gw_loc] "r"(ws->getwrk_op)
+ : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
: "memory");
#else
- plt_write64(gw.u64[0], ws->getwrk_op);
+ plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
@@ -130,11 +139,12 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
: [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
- : [tag_loc] "r"(ws->tag_wqe_op)
+ : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
: "memory");
#else
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
#endif
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 0684417ea..072800c24 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -481,6 +481,88 @@ cn9k_sso_selftest(void)
return cnxk_sso_selftest(RTE_STR(event_cn9k));
}
+static int
+cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int rc;
+
+ RTE_SET_USED(event_dev);
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9);
+ if (rc)
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+ else
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+ return 0;
+}
+
+static void
+cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem,
+ void *tstmp_info)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ if (dev->dual_ws) {
+ struct cn9k_sso_hws_dual *dws =
+ event_dev->data->ports[i];
+ dws->lookup_mem = lookup_mem;
+ dws->tstamp = tstmp_info;
+ } else {
+ struct cn9k_sso_hws *ws = event_dev->data->ports[i];
+ ws->lookup_mem = lookup_mem;
+ ws->tstamp = tstmp_info;
+ }
+ }
+}
+
+static int
+cn9k_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cn9k_eth_rxq *rxq;
+ void *lookup_mem;
+ void *tstmp_info;
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (rc)
+ return -EINVAL;
+
+ rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id,
+ queue_conf);
+ if (rc)
+ return -EINVAL;
+
+ rxq = eth_dev->data->rx_queues[0];
+ lookup_mem = rxq->lookup_mem;
+ tstmp_info = rxq->tstamp;
+ cn9k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info);
+ cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (rc)
+ return -EINVAL;
+
+ return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.dev_infos_get = cn9k_sso_info_get,
.dev_configure = cn9k_sso_dev_configure,
@@ -494,6 +576,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.port_unlink = cn9k_sso_port_unlink,
.timeout_ticks = cnxk_sso_timeout_ticks,
+ .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get,
+ .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add,
+ .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del,
+ .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+ .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
@@ -571,6 +659,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn9k, cn9k_pci_sso_map);
RTE_PMD_REGISTER_KMOD_DEP(event_cn9k, "vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(event_cn9k, CNXK_SSO_XAE_CNT "=<int>"
CNXK_SSO_GGRP_QOS "=<string>"
+ CNXK_SSO_FORCE_BP "=1"
CN9K_SSO_SINGLE_WS "=1"
CNXK_TIM_DISABLE_NPA "=1"
CNXK_TIM_CHNK_SLOTS "=<int>"
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 38fca08fb..f5a440146 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -5,9 +5,13 @@
#ifndef __CN9K_WORKER_H__
#define __CN9K_WORKER_H__
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
+#include "cn9k_ethdev.h"
+#include "cn9k_rx.h"
+
/* SSO Operations */
static __rte_always_inline uint8_t
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 7189ee3a7..cfd7fb971 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -571,6 +571,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
&dev->xae_cnt);
rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict,
dev);
+ rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_value,
+ &dev->force_ena_bp);
rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_value,
&single_ws);
rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 668e51d62..b65d725f5 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -6,6 +6,8 @@
#define __CNXK_EVENTDEV_H__
#include <rte_devargs.h>
+#include <rte_ethdev.h>
+#include <rte_event_eth_rx_adapter.h>
#include <rte_kvargs.h>
#include <rte_mbuf_pool_ops.h>
#include <rte_pci.h>
@@ -18,6 +20,7 @@
#define CNXK_SSO_XAE_CNT "xae_cnt"
#define CNXK_SSO_GGRP_QOS "qos"
+#define CNXK_SSO_FORCE_BP "force_rx_bp"
#define CN9K_SSO_SINGLE_WS "single_ws"
#define CN10K_SSO_GW_MODE "gw_mode"
@@ -81,7 +84,10 @@ struct cnxk_sso_evdev {
uint64_t nb_xaq_cfg;
rte_iova_t fc_iova;
struct rte_mempool *xaq_pool;
+ uint64_t rx_offloads;
uint64_t adptr_xae_cnt;
+ uint16_t rx_adptr_pool_cnt;
+ uint64_t *rx_adptr_pools;
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
@@ -89,25 +95,18 @@ struct cnxk_sso_evdev {
uint32_t xae_cnt;
uint8_t qos_queue_cnt;
struct cnxk_sso_qos *qos_parse_data;
+ uint8_t force_ena_bp;
/* CN9K */
uint8_t dual_ws;
/* CN10K */
uint8_t gw_mode;
} __rte_cache_aligned;
-/* CN10K HWS ops */
-#define CN10K_SSO_HWS_OPS \
- uintptr_t swtag_desched_op; \
- uintptr_t swtag_flush_op; \
- uintptr_t swtag_untag_op; \
- uintptr_t swtag_norm_op; \
- uintptr_t updt_wqe_op; \
- uintptr_t tag_wqe_op; \
- uintptr_t getwrk_op
-
struct cn10k_sso_hws {
- /* Get Work Fastpath data */
- CN10K_SSO_HWS_OPS;
+ uint64_t base;
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint32_t gw_wdata;
uint8_t swtag_req;
uint8_t hws_id;
@@ -115,7 +114,6 @@ struct cn10k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base;
uintptr_t lmt_base;
} __rte_cache_aligned;
@@ -132,6 +130,9 @@ struct cn10k_sso_hws {
struct cn9k_sso_hws {
/* Get Work Fastpath data */
CN9K_SSO_HWS_OPS;
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint8_t swtag_req;
uint8_t hws_id;
/* Add Work Fastpath data */
@@ -148,6 +149,9 @@ struct cn9k_sso_hws_state {
struct cn9k_sso_hws_dual {
/* Get Work Fastpath data */
struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint8_t swtag_req;
uint8_t vws; /* Ping pong bit */
uint8_t hws_id;
@@ -250,4 +254,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev,
/* CN9K */
void cn9k_sso_set_rsrc(void *arg);
+/* Common adapter ops */
+int cnxk_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf);
+int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id);
+int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev);
+int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev);
+
#endif /* __CNXK_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 89a1d82c1..3b7ecb375 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -2,6 +2,7 @@
* Copyright(C) 2021 Marvell.
*/
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
void
@@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
int i;
switch (event_type) {
+ case RTE_EVENT_TYPE_ETHDEV: {
+ struct cnxk_eth_rxq_sp *rxq = data;
+ uint64_t *old_ptr;
+
+ for (i = 0; i < dev->rx_adptr_pool_cnt; i++) {
+ if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i])
+ return;
+ }
+
+ dev->rx_adptr_pool_cnt++;
+ old_ptr = dev->rx_adptr_pools;
+ dev->rx_adptr_pools = rte_realloc(
+ dev->rx_adptr_pools,
+ sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0);
+ if (dev->rx_adptr_pools == NULL) {
+ dev->adptr_xae_cnt += rxq->qconf.mp->size;
+ dev->rx_adptr_pools = old_ptr;
+ dev->rx_adptr_pool_cnt--;
+ return;
+ }
+ dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] =
+ (uint64_t)rxq->qconf.mp;
+
+ dev->adptr_xae_cnt += rxq->qconf.mp->size;
+ break;
+ }
case RTE_EVENT_TYPE_TIMER: {
struct cnxk_tim_ring *timr = data;
uint16_t *old_ring_ptr;
@@ -65,3 +92,134 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
break;
}
}
+
+static int
+cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id,
+ uint16_t port_id, const struct rte_event *ev,
+ uint8_t custom_flowid)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+ rq->sso_ena = 1;
+ rq->tt = ev->sched_type;
+ rq->hwgrp = ev->queue_id;
+ rq->flow_tag_width = 20;
+ rq->wqe_skip = 1;
+ rq->tag_mask = (port_id & 0xF) << 20;
+ rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4))
+ << 24;
+
+ if (custom_flowid) {
+ rq->flow_tag_width = 0;
+ rq->tag_mask |= ev->flow_id;
+ }
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+static int
+cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+ rq->sso_ena = 0;
+ rq->flow_tag_width = 32;
+ rq->tag_mask = 0;
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+int
+cnxk_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ uint16_t port = eth_dev->data->port_id;
+ struct cnxk_eth_rxq_sp *rxq_sp;
+ int i, rc = 0;
+
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+ rc |= cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev,
+ i, queue_conf);
+ } else {
+ rxq_sp = cnxk_eth_rxq_to_sp(
+ eth_dev->data->rx_queues[rx_queue_id]);
+ cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc |= cnxk_sso_rxq_enable(
+ cnxk_eth_dev, (uint16_t)rx_queue_id, port,
+ &queue_conf->ev,
+ !!(queue_conf->rx_queue_flags &
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID));
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, true,
+ dev->force_ena_bp);
+ }
+
+ if (rc < 0) {
+ plt_err("Failed to configure Rx adapter port=%d, q=%d", port,
+ queue_conf->ev.queue_id);
+ return rc;
+ }
+
+ dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags;
+
+ return 0;
+}
+
+int
+cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ struct cnxk_eth_rxq_sp *rxq_sp;
+ int i, rc = 0;
+
+ RTE_SET_USED(event_dev);
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+ cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i);
+ } else {
+ rxq_sp = cnxk_eth_rxq_to_sp(
+ eth_dev->data->rx_queues[rx_queue_id]);
+ rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id);
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, false,
+ dev->force_ena_bp);
+ }
+
+ if (rc < 0)
+ plt_err("Failed to clear Rx adapter config port=%d, q=%d",
+ eth_dev->data->port_id, rx_queue_id);
+
+ return rc;
+}
+
+int
+cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev)
+{
+ RTE_SET_USED(event_dev);
+ RTE_SET_USED(eth_dev);
+
+ return 0;
+}
+
+int
+cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev)
+{
+ RTE_SET_USED(event_dev);
+ RTE_SET_USED(eth_dev);
+
+ return 0;
+}
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 87bb9f76a..eda562f5b 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -21,4 +21,11 @@ sources = files(
'cnxk_tim_worker.c',
)
-deps += ['bus_pci', 'common_cnxk']
+extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
+foreach flag: extra_flags
+ if cc.has_argument(flag)
+ cflags += flag
+ endif
+endforeach
+
+deps += ['bus_pci', 'common_cnxk', 'net_cnxk']
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v7 2/7] event/cnxk: add Rx adapter fastpath ops
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 1/7] event/cnxk: add Rx adapter support pbhagavatula
@ 2021-07-03 22:00 ` pbhagavatula
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 3/7] event/cnxk: add Tx adapter support pbhagavatula
` (5 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-03 22:00 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Rx adapter fastpath operations.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 136 +++++++-
drivers/event/cnxk/cn10k_worker.c | 54 ----
drivers/event/cnxk/cn10k_worker.h | 97 +++++-
drivers/event/cnxk/cn10k_worker_deq.c | 44 +++
drivers/event/cnxk/cn10k_worker_deq_burst.c | 29 ++
drivers/event/cnxk/cn10k_worker_deq_tmo.c | 72 +++++
drivers/event/cnxk/cn9k_eventdev.c | 305 +++++++++++++++++-
drivers/event/cnxk/cn9k_worker.c | 117 -------
drivers/event/cnxk/cn9k_worker.h | 174 ++++++++--
drivers/event/cnxk/cn9k_worker_deq.c | 44 +++
drivers/event/cnxk/cn9k_worker_deq_burst.c | 29 ++
drivers/event/cnxk/cn9k_worker_deq_tmo.c | 72 +++++
drivers/event/cnxk/cn9k_worker_dual_deq.c | 53 +++
.../event/cnxk/cn9k_worker_dual_deq_burst.c | 30 ++
drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 89 +++++
drivers/event/cnxk/cnxk_eventdev.h | 1 +
drivers/event/cnxk/meson.build | 9 +
17 files changed, 1124 insertions(+), 231 deletions(-)
create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c
create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c
create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 2060c8fe8..ba7d95fff 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -237,17 +237,141 @@ static void
cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst;
-
- event_dev->dequeue = cn10k_sso_hws_deq;
- event_dev->dequeue_burst = cn10k_sso_hws_deq_burst;
- if (dev->is_timeout_deq) {
- event_dev->dequeue = cn10k_sso_hws_tmo_deq;
- event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_deq_seg
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_seg_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_tmo_deq_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_deq
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_tmo_deq
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_tmo_deq_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
}
}
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index 5dbae275b..c71aa3732 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -60,57 +60,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-
-uint16_t __rte_hot
-cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn10k_sso_hws *ws = port;
-
- RTE_SET_USED(timeout_ticks);
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
- return 1;
- }
-
- return cn10k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
- uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn10k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn10k_sso_hws *ws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
- return ret;
- }
-
- ret = cn10k_sso_hws_get_work(ws, ev);
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
- ret = cn10k_sso_hws_get_work(ws, ev);
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index c7250bf9e..b724083ca 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -87,20 +87,37 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
cn10k_sso_hws_fwd_group(ws, ev, grp);
}
+static __rte_always_inline void
+cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+ const uint32_t tag, const uint32_t flags,
+ const void *const lookup_mem)
+{
+ const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+
+ cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+ (struct rte_mbuf *)mbuf, lookup_mem,
+ mbuf_init | ((uint64_t)port_id) << 48, flags);
+}
+
static __rte_always_inline uint16_t
-cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
+cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
+ const uint32_t flags, void *lookup_mem)
{
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
gw.get_work = ws->gw_wdata;
#if defined(RTE_ARCH_ARM64) && !defined(__clang__)
asm volatile(
PLT_CPU_FEATURE_PREAMBLE
"caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
- : [wdata] "+r"(gw.get_work)
+ "sub %[mbuf], %H[wdata], #0x80 \n"
+ : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf)
: [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
: "memory");
#else
@@ -109,11 +126,34 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
roc_load_pair(gw.u64[0], gw.u64[1],
ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
+ ws->tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -128,6 +168,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t mbuf;
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -138,7 +179,9 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
" ldp %[tag], %[wqp], [%[tag_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
: "memory");
#else
@@ -146,12 +189,25 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
roc_load_pair(gw.u64[0], gw.u64[1],
ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, 0, NULL);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -170,16 +226,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
const struct rte_event ev[],
uint16_t nb_events);
-uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
#endif
diff --git a/drivers/event/cnxk/cn10k_worker_deq.c b/drivers/event/cnxk/cn10k_worker_deq.c
new file mode 100644
index 000000000..36ec454cc
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return 1; \
+ } \
+ \
+ return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return 1; \
+ } \
+ \
+ return cn10k_sso_hws_get_work( \
+ ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn10k_worker_deq_burst.c b/drivers/event/cnxk/cn10k_worker_deq_burst.c
new file mode 100644
index 000000000..29ecc551c
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq_burst.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn10k_worker_deq_tmo.c b/drivers/event/cnxk/cn10k_worker_deq_tmo.c
new file mode 100644
index 000000000..c8524a27b
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq_tmo.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return ret; \
+ } \
+ \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return ret; \
+ } \
+ \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 072800c24..e386cb784 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -252,17 +252,202 @@ static void
cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ /* Single WS modes */
+ const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_tmo[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_tmo_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_tmo_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_deq_tmo_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ /* Dual WS modes */
+ const event_dequeue_t sso_hws_dual_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_tmo[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_tmo_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_tmo_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_tmo_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
event_dev->enqueue = cn9k_sso_hws_enq;
event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst;
-
- event_dev->dequeue = cn9k_sso_hws_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_deq_burst;
- if (dev->deq_tmo_ns) {
- event_dev->dequeue = cn9k_sso_hws_tmo_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_deq_seg
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_seg_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_deq_tmo_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_tmo_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_deq
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_deq_tmo
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_tmo_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
}
if (dev->dual_ws) {
@@ -272,14 +457,110 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
event_dev->enqueue_forward_burst =
cn9k_sso_hws_dual_enq_fwd_burst;
- event_dev->dequeue = cn9k_sso_hws_dual_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst;
- if (dev->deq_tmo_ns) {
- event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq;
- event_dev->dequeue_burst =
- cn9k_sso_hws_dual_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_dual_deq_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_dual_deq_tmo_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst =
+ sso_hws_dual_deq_tmo_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_dual_deq
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_dual_deq_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_dual_deq_tmo
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst =
+ sso_hws_dual_deq_tmo_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ }
}
}
+
+ rte_mb();
}
static void *
diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c
index 9ceacc98d..538bc4b0b 100644
--- a/drivers/event/cnxk/cn9k_worker.c
+++ b/drivers/event/cnxk/cn9k_worker.c
@@ -60,60 +60,6 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-uint16_t __rte_hot
-cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws *ws = port;
-
- RTE_SET_USED(timeout_ticks);
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_op);
- return 1;
- }
-
- return cn9k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
- uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws *ws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_op);
- return ret;
- }
-
- ret = cn9k_sso_hws_get_work(ws, ev);
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
- ret = cn9k_sso_hws_get_work(ws, ev);
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
-
/* Dual ws ops. */
uint16_t __rte_hot
@@ -171,66 +117,3 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws_dual *dws = port;
- uint16_t gw;
-
- RTE_SET_USED(timeout_ticks);
- if (dws->swtag_req) {
- dws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
- return 1;
- }
-
- gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- return gw;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws_dual *dws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (dws->swtag_req) {
- dws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
- return ret;
- }
-
- ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) {
- ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- }
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index f5a440146..c01c00e1d 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -128,17 +128,36 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws,
}
}
+static __rte_always_inline void
+cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+ const uint32_t tag, const uint32_t flags,
+ const void *const lookup_mem)
+{
+ const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+
+ cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+ (struct rte_mbuf *)mbuf, lookup_mem,
+ mbuf_init | ((uint64_t)port_id) << 48, flags);
+}
+
static __rte_always_inline uint16_t
cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
struct cn9k_sso_hws_state *ws_pair,
- struct rte_event *ev)
+ struct rte_event *ev, const uint32_t flags,
+ const void *const lookup_mem,
+ struct cnxk_timesync_info *const tstamp)
{
const uint64_t set_gw = BIT_ULL(16) | 1;
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
+ if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+ rte_prefetch_non_temporal(lookup_mem);
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
"rty%=: \n"
@@ -147,7 +166,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
" tbnz %[tag], 63, rty%= \n"
"done%=: str %[gw], [%[pong]] \n"
" dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ " prfm pldl1keep, [%[mbuf]] \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op),
[gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op));
#else
@@ -156,12 +178,34 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
plt_write64(set_gw, ws_pair->getwrk_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -169,16 +213,22 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
}
static __rte_always_inline uint16_t
-cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
+cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev,
+ const uint32_t flags, const void *const lookup_mem)
{
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
plt_write64(BIT_ULL(16) | /* wait for work. */
1, /* Use Mask set 0. */
ws->getwrk_op);
+
+ if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+ rte_prefetch_non_temporal(lookup_mem);
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
" ldr %[tag], [%[tag_loc]] \n"
@@ -190,7 +240,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
" ldr %[wqp], [%[wqp_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ " prfm pldl1keep, [%[mbuf]] \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
#else
gw.u64[0] = plt_read64(ws->tag_op);
@@ -198,12 +251,35 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
+ ws->tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -218,6 +294,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t mbuf;
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -230,7 +307,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
" ldr %[wqp], [%[wqp_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
#else
gw.u64[0] = plt_read64(ws->tag_op);
@@ -238,12 +317,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, 0, NULL);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -274,28 +366,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port,
const struct rte_event ev[],
uint16_t nb_events);
-uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-
-uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
#endif
diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c
new file mode 100644
index 000000000..51ccaf4ec
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return 1; \
+ } \
+ \
+ return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return 1; \
+ } \
+ \
+ return cn9k_sso_hws_get_work( \
+ ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_deq_burst.c b/drivers/event/cnxk/cn9k_worker_deq_burst.c
new file mode 100644
index 000000000..4e2801459
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq_burst.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
new file mode 100644
index 000000000..9713d1ef0
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c
new file mode 100644
index 000000000..709fa2d9e
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t gw; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return 1; \
+ } \
+ \
+ gw = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ return gw; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t gw; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return 1; \
+ } \
+ \
+ gw = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ return gw; \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
new file mode 100644
index 000000000..d50e1cf83
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
new file mode 100644
index 000000000..a0508fdf0
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], \
+ &dws->ws_state[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ } \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_tmo_##name(port, ev, \
+ timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], \
+ &dws->ws_state[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ } \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index b65d725f5..9d5d2d033 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -33,6 +33,7 @@
#define CNXK_SSO_MZ_NAME "cnxk_evdev_mz"
#define CNXK_SSO_XAQ_CACHE_CNT (0x7)
#define CNXK_SSO_XAQ_SLACK (8)
+#define CNXK_SSO_WQE_SG_PTR (9)
#define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY)
#define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY)
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index eda562f5b..c5c1c0ee8 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -11,8 +11,17 @@ endif
sources = files(
'cn9k_eventdev.c',
'cn9k_worker.c',
+ 'cn9k_worker_deq.c',
+ 'cn9k_worker_deq_burst.c',
+ 'cn9k_worker_deq_tmo.c',
+ 'cn9k_worker_dual_deq.c',
+ 'cn9k_worker_dual_deq_burst.c',
+ 'cn9k_worker_dual_deq_tmo.c',
'cn10k_eventdev.c',
'cn10k_worker.c',
+ 'cn10k_worker_deq.c',
+ 'cn10k_worker_deq_burst.c',
+ 'cn10k_worker_deq_tmo.c',
'cnxk_eventdev.c',
'cnxk_eventdev_adptr.c',
'cnxk_eventdev_selftest.c',
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v7 3/7] event/cnxk: add Tx adapter support
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
@ 2021-07-03 22:00 ` pbhagavatula
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
` (4 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-03 22:00 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Tx adapter.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 4 +-
doc/guides/rel_notes/release_21_08.rst | 6 +-
drivers/common/cnxk/roc_nix.h | 1 +
drivers/common/cnxk/roc_nix_queue.c | 8 +-
drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++
drivers/event/cnxk/cn9k_eventdev.c | 148 +++++++++++++++++++++++
drivers/event/cnxk/cnxk_eventdev.h | 22 +++-
drivers/event/cnxk/cnxk_eventdev_adptr.c | 88 ++++++++++++++
8 files changed, 359 insertions(+), 9 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index b7e82c127..6fdccc2ab 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -42,7 +42,9 @@ Features of the OCTEON cnxk SSO PMD are:
- HW managed packets enqueued from ethdev to eventdev exposed through event eth
RX adapter.
- N:1 ethernet device Rx queue to Event queue mapping.
-- Full Rx offload support defined through ethdev queue configuration.
+- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE``
+ capability while maintaining receive packet order.
+- Full Rx/Tx offload support defined through ethdev queue configuration.
Prerequisites and Compilation procedure
---------------------------------------
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 3892c8017..80ff93269 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -60,10 +60,10 @@ New Features
* Added net/cnxk driver which provides the support for the integrated ethernet
device.
-* **Added support for Marvell CN10K, CN9K, event Rx adapter.**
+* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.**
- * Added Rx adapter support for event/cnxk when the ethernet device requested is
- net/cnxk.
+ * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
+ is net/cnxk.
Removed Items
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index 76613fe84..822c1900e 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -200,6 +200,7 @@ struct roc_nix_sq {
uint64_t aura_handle;
int16_t nb_sqb_bufs_adj;
uint16_t nb_sqb_bufs;
+ uint16_t aura_sqb_bufs;
plt_iova_t io_addr;
void *lmt_addr;
void *sqe_mem;
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index 0604e7a18..7e2f86eca 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -587,12 +587,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
aura.fc_ena = 1;
aura.fc_addr = (uint64_t)sq->fc;
aura.fc_hyst_bits = 0; /* Store count on all updates */
- rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, nb_sqb_bufs, &aura,
+ rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, NIX_MAX_SQB, &aura,
&pool);
if (rc)
goto fail;
- sq->sqe_mem = plt_zmalloc(blk_sz * nb_sqb_bufs, blk_sz);
+ sq->sqe_mem = plt_zmalloc(blk_sz * NIX_MAX_SQB, blk_sz);
if (sq->sqe_mem == NULL) {
rc = NIX_ERR_NO_MEM;
goto nomem;
@@ -600,11 +600,13 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
/* Fill the initial buffers */
iova = (uint64_t)sq->sqe_mem;
- for (count = 0; count < nb_sqb_bufs; count++) {
+ for (count = 0; count < NIX_MAX_SQB; count++) {
roc_npa_aura_op_free(sq->aura_handle, 0, iova);
iova += blk_sz;
}
roc_npa_aura_op_range_set(sq->aura_handle, (uint64_t)sq->sqe_mem, iova);
+ roc_npa_aura_limit_modify(sq->aura_handle, sq->nb_sqb_bufs);
+ sq->aura_sqb_bufs = NIX_MAX_SQB;
return rc;
nomem:
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index ba7d95fff..8a9b04a3d 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+ ws->tx_base = ws->base;
ws->hws_id = port_id;
ws->swtag_req = 0;
ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
}
+static int
+cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ if (dev->tx_adptr_data == NULL)
+ return 0;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ struct cn10k_sso_hws *ws = event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(ws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn10k_sso_hws) +
+ (sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = ws;
+ }
+
+ return 0;
+}
+
static void
cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
@@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
{
int rc;
+ rc = cn10k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+
rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
cn10k_sso_hws_flush_events);
if (rc < 0)
@@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (ret)
+ *caps = 0;
+ else
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+ return 0;
+}
+
+static int
+cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ rc = cn10k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+ cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ return cn10k_sso_updt_tx_adptr_data(event_dev);
+}
+
static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.dev_infos_get = cn10k_sso_info_get,
.dev_configure = cn10k_sso_dev_configure,
@@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get,
+ .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add,
+ .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index e386cb784..21f80323d 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
}
+static int
+cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ if (dev->tx_adptr_data == NULL)
+ return 0;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ if (dev->dual_ws) {
+ struct cn9k_sso_hws_dual *dws =
+ event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(dws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn9k_sso_hws_dual) +
+ (sizeof(uint64_t) *
+ (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ dws = RTE_PTR_ADD(ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&dws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = dws;
+ } else {
+ struct cn9k_sso_hws *ws = event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(ws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn9k_sso_hws_dual) +
+ (sizeof(uint64_t) *
+ (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ ws = RTE_PTR_ADD(ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = ws;
+ }
+ }
+ rte_mb();
+
+ return 0;
+}
+
static void
cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
@@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev)
{
int rc;
+ rc = cn9k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+
rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset,
cn9k_sso_hws_flush_events);
if (rc < 0)
@@ -844,6 +908,86 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (ret)
+ *caps = 0;
+ else
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+ return 0;
+}
+
+static void
+cn9k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id,
+ bool ena)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cn9k_eth_txq *txq;
+ struct roc_nix_sq *sq;
+ int i;
+
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+ cn9k_sso_txq_fc_update(eth_dev, i, ena);
+ } else {
+ uint16_t sq_limit;
+
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ txq = eth_dev->data->tx_queues[tx_queue_id];
+ sq_limit =
+ ena ? RTE_MIN(CNXK_SSO_SQB_LIMIT, sq->aura_sqb_bufs) :
+ sq->nb_sqb_bufs;
+ txq->nb_sqb_bufs_adj =
+ sq_limit -
+ RTE_ALIGN_MUL_CEIL(sq_limit,
+ (1ULL << txq->sqes_per_sqb_log2)) /
+ (1ULL << txq->sqes_per_sqb_log2);
+ txq->nb_sqb_bufs_adj = (70 * txq->nb_sqb_bufs_adj) / 100;
+ }
+}
+
+static int
+cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, true);
+ rc = cn9k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+ cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, false);
+ return cn9k_sso_updt_tx_adptr_data(event_dev);
+}
+
static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.dev_infos_get = cn9k_sso_info_get,
.dev_configure = cn9k_sso_dev_configure,
@@ -863,6 +1007,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get,
+ .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add,
+ .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 9d5d2d033..24e1be6a9 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -8,6 +8,7 @@
#include <rte_devargs.h>
#include <rte_ethdev.h>
#include <rte_event_eth_rx_adapter.h>
+#include <rte_event_eth_tx_adapter.h>
#include <rte_kvargs.h>
#include <rte_mbuf_pool_ops.h>
#include <rte_pci.h>
@@ -34,6 +35,7 @@
#define CNXK_SSO_XAQ_CACHE_CNT (0x7)
#define CNXK_SSO_XAQ_SLACK (8)
#define CNXK_SSO_WQE_SG_PTR (9)
+#define CNXK_SSO_SQB_LIMIT (0x180)
#define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY)
#define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY)
@@ -86,9 +88,12 @@ struct cnxk_sso_evdev {
rte_iova_t fc_iova;
struct rte_mempool *xaq_pool;
uint64_t rx_offloads;
+ uint64_t tx_offloads;
uint64_t adptr_xae_cnt;
uint16_t rx_adptr_pool_cnt;
uint64_t *rx_adptr_pools;
+ uint64_t *tx_adptr_data;
+ uint16_t max_port_id;
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
@@ -115,7 +120,10 @@ struct cn10k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
+ /* Tx Fastpath data */
+ uint64_t tx_base __rte_cache_aligned;
uintptr_t lmt_base;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
/* CN9K HWS ops */
@@ -140,7 +148,9 @@ struct cn9k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base;
+ /* Tx Fastpath data */
+ uint64_t base __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct cn9k_sso_hws_state {
@@ -160,7 +170,9 @@ struct cn9k_sso_hws_dual {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base[2];
+ /* Tx Fastpath data */
+ uint64_t base[2] __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct cnxk_sso_hws_cookie {
@@ -267,5 +279,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
const struct rte_eth_dev *eth_dev);
int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
const struct rte_eth_dev *eth_dev);
+int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id);
+int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id);
#endif /* __CNXK_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 3b7ecb375..502da272d 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -223,3 +223,91 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
return 0;
}
+
+static int
+cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs)
+{
+ return roc_npa_aura_limit_modify(
+ sq->aura_handle, RTE_MIN(nb_sqb_bufs, sq->aura_sqb_bufs));
+}
+
+static int
+cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev,
+ uint16_t eth_port_id, uint16_t tx_queue_id,
+ void *txq)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ uint16_t max_port_id = dev->max_port_id;
+ uint64_t *txq_data = dev->tx_adptr_data;
+
+ if (txq_data == NULL || eth_port_id > max_port_id) {
+ max_port_id = RTE_MAX(max_port_id, eth_port_id);
+ txq_data = rte_realloc_socket(
+ txq_data,
+ (sizeof(uint64_t) * (max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, event_dev->data->socket_id);
+ if (txq_data == NULL)
+ return -ENOMEM;
+ }
+
+ ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT])
+ txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq;
+ dev->max_port_id = max_port_id;
+ dev->tx_adptr_data = txq_data;
+ return 0;
+}
+
+int
+cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ struct roc_nix_sq *sq;
+ int i, ret;
+ void *txq;
+
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+ cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, i);
+ } else {
+ txq = eth_dev->data->tx_queues[tx_queue_id];
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id, txq);
+ if (ret < 0)
+ return ret;
+
+ dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags;
+ }
+
+ return 0;
+}
+
+int
+cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct roc_nix_sq *sq;
+ int i, ret;
+
+ RTE_SET_USED(event_dev);
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+ cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, i);
+ } else {
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id, NULL);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v7 4/7] event/cnxk: add Tx adapter fastpath ops
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 3/7] event/cnxk: add Tx adapter support pbhagavatula
@ 2021-07-03 22:00 ` pbhagavatula
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
` (3 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-03 22:00 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Tx adapter fastpath operations.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 38 ++++++++
drivers/event/cnxk/cn10k_worker.h | 67 +++++++++++++
drivers/event/cnxk/cn10k_worker_tx_enq.c | 23 +++++
drivers/event/cnxk/cn10k_worker_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cn9k_eventdev.c | 81 ++++++++++++++++
drivers/event/cnxk/cn9k_worker.h | 97 +++++++++++++++++++
drivers/event/cnxk/cn9k_worker_dual_tx_enq.c | 23 +++++
.../event/cnxk/cn9k_worker_dual_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cn9k_worker_tx_enq.c | 23 +++++
drivers/event/cnxk/cn9k_worker_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cnxk_worker.h | 27 +++---
drivers/event/cnxk/meson.build | 6 ++
12 files changed, 440 insertions(+), 14 deletions(-)
create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c
create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 8a9b04a3d..e462f770c 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
#undef R
};
+ /* Tx modes */
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
@@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
}
}
+
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
+
+ event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
}
static void
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index b724083ca..3c90c8500 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -11,6 +11,7 @@
#include "cn10k_ethdev.h"
#include "cn10k_rx.h"
+#include "cn10k_tx.h"
/* SSO Operations */
@@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
NIX_RX_FASTPATH_MODES
#undef R
+static __rte_always_inline const struct cn10k_eth_txq *
+cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+ return (const struct cn10k_eth_txq *)
+ txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline uint16_t
+cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
+ uint64_t *cmd,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ const struct cn10k_eth_txq *txq;
+ struct rte_mbuf *m = ev->mbuf;
+ uint16_t ref_cnt = m->refcnt;
+ uintptr_t lmt_addr;
+ uint16_t lmt_id;
+ uintptr_t pa;
+
+ lmt_addr = ws->lmt_base;
+ ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+ txq = cn10k_sso_hws_xtract_meta(m, txq_data);
+ cn10k_nix_tx_skeleton(txq, cmd, flags);
+ /* Perform header writes before barrier for TSO */
+ if (flags & NIX_TX_OFFLOAD_TSO_F)
+ cn10k_nix_xmit_prepare_tso(m, flags);
+
+ cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt);
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw =
+ cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags);
+ pa = txq->io_addr | ((segdw - 1) << 4);
+ } else {
+ pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
+ }
+ if (!ev->sched_type)
+ cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
+
+ roc_lmt_submit_steorl(lmt_id, pa);
+
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if (ref_cnt > 1)
+ return 1;
+ }
+
+ cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
+ ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+ return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
#endif
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c
new file mode 100644
index 000000000..f9968ac0d
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn10k_sso_hws_event_tx( \
+ ws, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
new file mode 100644
index 000000000..a24fc42e5
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn10k_sso_hws_event_tx( \
+ ws, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 21f80323d..a69edff19 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -430,6 +430,39 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
#undef R
};
+ /* Tx modes */
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
event_dev->enqueue = cn9k_sso_hws_enq;
event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
@@ -510,6 +543,25 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
}
}
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
+
if (dev->dual_ws) {
event_dev->enqueue = cn9k_sso_hws_dual_enq;
event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst;
@@ -618,8 +670,37 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
NIX_RX_OFFLOAD_RSS_F)];
}
}
+
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM]
+ */
+ event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
}
+ event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
rte_mb();
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index c01c00e1d..3f9751211 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -11,6 +11,7 @@
#include "cn9k_ethdev.h"
#include "cn9k_rx.h"
+#include "cn9k_tx.h"
/* SSO Operations */
@@ -416,4 +417,100 @@ NIX_RX_FASTPATH_MODES
NIX_RX_FASTPATH_MODES
#undef R
+static __rte_always_inline void
+cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq)
+{
+ while (!(((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)
+ << (txq)->sqes_per_sqb_log2))
+ ;
+}
+
+static __rte_always_inline const struct cn9k_eth_txq *
+cn9k_sso_hws_xtract_meta(struct rte_mbuf *m,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+ return (const struct cn9k_eth_txq *)
+ txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline void
+cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m,
+ uint64_t *cmd, const uint32_t flags)
+{
+ roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags));
+ cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt);
+}
+
+static __rte_always_inline uint16_t
+cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ struct rte_mbuf *m = ev->mbuf;
+ const struct cn9k_eth_txq *txq;
+ uint16_t ref_cnt = m->refcnt;
+
+ /* Perform header writes before barrier for TSO */
+ cn9k_nix_xmit_prepare_tso(m, flags);
+ /* Lets commit any changes in the packet here in case when
+ * fast free is set as no further changes will be made to mbuf.
+ * In case of fast free is not set, both cn9k_nix_prepare_mseg()
+ * and cn9k_nix_xmit_prepare() has a barrier after refcnt update.
+ */
+ if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
+ rte_io_wmb();
+ txq = cn9k_sso_hws_xtract_meta(m, txq_data);
+ cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags);
+
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
+ if (!CNXK_TT_FROM_EVENT(ev->event)) {
+ cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
+ cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ cn9k_sso_txq_fc_wait(txq);
+ if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+ cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
+ txq->io_addr, segdw);
+ } else {
+ cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr,
+ segdw);
+ }
+ } else {
+ if (!CNXK_TT_FROM_EVENT(ev->event)) {
+ cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
+ cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ cn9k_sso_txq_fc_wait(txq);
+ if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+ cn9k_nix_xmit_one(cmd, txq->lmt_addr,
+ txq->io_addr, flags);
+ } else {
+ cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr,
+ flags);
+ }
+ }
+
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if (ref_cnt > 1)
+ return 1;
+ }
+
+ cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG,
+ base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+ return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
#endif
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
new file mode 100644
index 000000000..92e2981f0
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn9k_sso_hws_dual *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base[!ws->vws], &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
new file mode 100644
index 000000000..dfb574cf9
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn9k_sso_hws_dual *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base[!ws->vws], &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c
new file mode 100644
index 000000000..3df649c0c
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
new file mode 100644
index 000000000..0efe29113
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 4eb46ae16..7891b749d 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -79,21 +79,20 @@ static __rte_always_inline void
cnxk_sso_hws_head_wait(uintptr_t tag_op)
{
#ifdef RTE_ARCH_ARM64
- uint64_t swtp;
-
- asm volatile(PLT_CPU_FEATURE_PREAMBLE
- " ldr %[swtb], [%[swtp_loc]] \n"
- " tbz %[swtb], 35, done%= \n"
- " sevl \n"
- "rty%=: wfe \n"
- " ldr %[swtb], [%[swtp_loc]] \n"
- " tbnz %[swtb], 35, rty%= \n"
- "done%=: \n"
- : [swtb] "=&r"(swtp)
- : [swtp_loc] "r"(tag_op));
+ uint64_t tag;
+
+ asm volatile(" ldr %[tag], [%[tag_op]] \n"
+ " tbnz %[tag], 35, done%= \n"
+ " sevl \n"
+ "rty%=: wfe \n"
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbz %[tag], 35, rty%= \n"
+ "done%=: \n"
+ : [tag] "=&r"(tag)
+ : [tag_op] "r"(tag_op));
#else
- /* Wait for the SWTAG/SWTAG_FULL operation */
- while (plt_read64(tag_op) & BIT_ULL(35))
+ /* Wait for the HEAD to be set */
+ while (!(plt_read64(tag_op) & BIT_ULL(35)))
;
#endif
}
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index c5c1c0ee8..13e0634e8 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -17,11 +17,17 @@ sources = files(
'cn9k_worker_dual_deq.c',
'cn9k_worker_dual_deq_burst.c',
'cn9k_worker_dual_deq_tmo.c',
+ 'cn9k_worker_tx_enq.c',
+ 'cn9k_worker_tx_enq_seg.c',
+ 'cn9k_worker_dual_tx_enq.c',
+ 'cn9k_worker_dual_tx_enq_seg.c',
'cn10k_eventdev.c',
'cn10k_worker.c',
'cn10k_worker_deq.c',
'cn10k_worker_deq_burst.c',
'cn10k_worker_deq_tmo.c',
+ 'cn10k_worker_tx_enq.c',
+ 'cn10k_worker_tx_enq_seg.c',
'cnxk_eventdev.c',
'cnxk_eventdev_adptr.c',
'cnxk_eventdev_selftest.c',
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v7 5/7] event/cnxk: add Rx adapter vector support
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (2 preceding siblings ...)
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
@ 2021-07-03 22:00 ` pbhagavatula
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
` (2 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-03 22:00 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add event vector support for cnxk event Rx adapter, add control path
APIs to get vector limits and ability to configure event vectorization
on a given Rx queue.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 2 +
drivers/event/cnxk/cn10k_eventdev.c | 106 ++++++++++++++++++++++-
drivers/event/cnxk/cnxk_eventdev.h | 2 +
drivers/event/cnxk/cnxk_eventdev_adptr.c | 25 ++++++
drivers/net/cnxk/cnxk_ethdev.h | 2 +-
5 files changed, 135 insertions(+), 2 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 6fdccc2ab..0297cd3d5 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -45,6 +45,8 @@ Features of the OCTEON cnxk SSO PMD are:
- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE``
capability while maintaining receive packet order.
- Full Rx/Tx offload support defined through ethdev queue configuration.
+- HW managed event vectorization on CN10K for packets enqueued from ethdev to
+ eventdev configurable per each Rx queue in Rx adapter.
Prerequisites and Compilation procedure
---------------------------------------
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index e462f770c..e85fa4785 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
else
*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
- RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
return 0;
}
@@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn10k_sso_rx_adapter_vector_limits(
+ const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+ struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev;
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (ret)
+ return -ENOTSUP;
+
+ cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+ limits->log2_sz = true;
+ limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+ limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+ limits->min_timeout_ns =
+ (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100;
+ limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns;
+
+ return 0;
+}
+
+static int
+cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
+ uint16_t port_id, uint16_t rq_id, uint16_t sz,
+ uint64_t tmo_ns, struct rte_mempool *vmp)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+
+ if (!rq->sso_ena)
+ return -EINVAL;
+ if (rq->flow_tag_width == 0)
+ return -EINVAL;
+
+ rq->vwqe_ena = 1;
+ rq->vwqe_first_skip = 0;
+ rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id);
+ rq->vwqe_max_sz_exp = rte_log2_u32(sz);
+ rq->vwqe_wait_tmo =
+ tmo_ns /
+ ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100);
+ rq->tag_mask = (port_id & 0xF) << 20;
+ rq->tag_mask |=
+ (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4))
+ << 24;
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+static int
+cn10k_sso_rx_adapter_vector_config(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_event_vector_config *config)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev;
+ struct cnxk_sso_evdev *dev;
+ int i, rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ dev = cnxk_sso_pmd_priv(event_dev);
+ cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+ cnxk_sso_updt_xae_cnt(dev, config->vector_mp,
+ RTE_EVENT_TYPE_ETHDEV_VECTOR);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc = cnxk_sso_rx_adapter_vwqe_enable(
+ cnxk_eth_dev, eth_dev->data->port_id, i,
+ config->vector_sz, config->vector_timeout_ns,
+ config->vector_mp);
+ if (rc)
+ return -EINVAL;
+ }
+ } else {
+
+ cnxk_sso_updt_xae_cnt(dev, config->vector_mp,
+ RTE_EVENT_TYPE_ETHDEV_VECTOR);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc = cnxk_sso_rx_adapter_vwqe_enable(
+ cnxk_eth_dev, eth_dev->data->port_id, rx_queue_id,
+ config->vector_sz, config->vector_timeout_ns,
+ config->vector_mp);
+ if (rc)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int
cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
const struct rte_eth_dev *eth_dev, uint32_t *caps)
@@ -739,6 +839,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_rx_adapter_vector_limits_get = cn10k_sso_rx_adapter_vector_limits,
+ .eth_rx_adapter_event_vector_config =
+ cn10k_sso_rx_adapter_vector_config,
+
.eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get,
.eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add,
.eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 24e1be6a9..fc49b88d6 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -97,6 +97,8 @@ struct cnxk_sso_evdev {
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
+ uint16_t vec_pool_cnt;
+ uint64_t *vec_pools;
/* Dev args */
uint32_t xae_cnt;
uint8_t qos_queue_cnt;
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 502da272d..baf2f2aa6 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -38,6 +38,31 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
dev->adptr_xae_cnt += rxq->qconf.mp->size;
break;
}
+ case RTE_EVENT_TYPE_ETHDEV_VECTOR: {
+ struct rte_mempool *mp = data;
+ uint64_t *old_ptr;
+
+ for (i = 0; i < dev->vec_pool_cnt; i++) {
+ if ((uint64_t)mp == dev->vec_pools[i])
+ return;
+ }
+
+ dev->vec_pool_cnt++;
+ old_ptr = dev->vec_pools;
+ dev->vec_pools =
+ rte_realloc(dev->vec_pools,
+ sizeof(uint64_t) * dev->vec_pool_cnt, 0);
+ if (dev->vec_pools == NULL) {
+ dev->adptr_xae_cnt += mp->size;
+ dev->vec_pools = old_ptr;
+ dev->vec_pool_cnt--;
+ return;
+ }
+ dev->vec_pools[dev->vec_pool_cnt - 1] = (uint64_t)mp;
+
+ dev->adptr_xae_cnt += mp->size;
+ break;
+ }
case RTE_EVENT_TYPE_TIMER: {
struct cnxk_tim_ring *timr = data;
uint16_t *old_ring_ptr;
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 4eead0390..2528b3cda 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -238,7 +238,7 @@ struct cnxk_eth_txq_sp {
} __plt_cache_aligned;
static inline struct cnxk_eth_dev *
-cnxk_eth_pmd_priv(struct rte_eth_dev *eth_dev)
+cnxk_eth_pmd_priv(const struct rte_eth_dev *eth_dev)
{
return eth_dev->data->dev_private;
}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v7 6/7] event/cnxk: add Rx event vector fastpath
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (3 preceding siblings ...)
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
@ 2021-07-03 22:00 ` pbhagavatula
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 7/7] event/cnxk: add Tx " pbhagavatula
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 1/7] event/cnxk: add Rx adapter support pbhagavatula
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-03 22:00 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add Rx event vector fastpath to convert HW defined metadata into
rte_mbuf and rte_event_vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/rel_notes/release_21_08.rst | 1 +
drivers/event/cnxk/cn10k_worker.h | 56 +++++++
drivers/net/cnxk/cn10k_rx.h | 200 +++++++++++++++----------
drivers/net/cnxk/cn10k_rx_vec.c | 2 +-
drivers/net/cnxk/cn10k_rx_vec_mseg.c | 5 +-
5 files changed, 179 insertions(+), 85 deletions(-)
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 80ff93269..11ccc9bcb 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -64,6 +64,7 @@ New Features
* Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
is net/cnxk.
+ * Add support for event vectorization for Rx adapter.
Removed Items
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 3c90c8500..7a48a6b17 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,6 +5,8 @@
#ifndef __CN10K_WORKER_H__
#define __CN10K_WORKER_H__
+#include <rte_vect.h>
+
#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
@@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
mbuf_init | ((uint64_t)port_id) << 48, flags);
}
+static __rte_always_inline void
+cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags,
+ void *lookup_mem, void *tstamp)
+{
+ uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+ struct rte_event_vector *vec;
+ uint16_t nb_mbufs, non_vec;
+ uint64_t **wqe;
+
+ mbuf_init |= ((uint64_t)port_id) << 48;
+ vec = (struct rte_event_vector *)vwqe;
+ wqe = vec->u64s;
+
+ nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+ nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs,
+ flags | NIX_RX_VWQE_F, lookup_mem,
+ tstamp);
+ wqe += nb_mbufs;
+ non_vec = vec->nb_elem - nb_mbufs;
+
+ while (non_vec) {
+ struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+ struct rte_mbuf *mbuf;
+ uint64_t tstamp_ptr;
+
+ mbuf = (struct rte_mbuf *)((char *)cqe -
+ sizeof(struct rte_mbuf));
+ cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem,
+ mbuf_init, flags);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ wqe[0] = (uint64_t *)mbuf;
+ non_vec--;
+ wqe++;
+ }
+}
+
static __rte_always_inline uint16_t
cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
const uint32_t flags, void *lookup_mem)
@@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
flags & NIX_RX_MULTI_SEG_F,
(uint64_t *)tstamp_ptr);
gw.u64[1] = mbuf;
+ } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+ __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1];
+
+ vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) |
+ ((vwqe_hdr & 0xFFFF) << 48) |
+ ((uint64_t)port << 32);
+ *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr;
+ cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem,
+ ws->tstamp);
}
}
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index d9572b19e..a506a867c 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -21,6 +21,7 @@
* Defining it from backwards to denote its been
* not used as offload flags to pick function
*/
+#define NIX_RX_VWQE_F BIT(14)
#define NIX_RX_MULTI_SEG_F BIT(15)
#define CNXK_NIX_CQ_ENTRY_SZ 128
@@ -28,6 +29,11 @@
#define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x))
#define CQE_SZ(x) ((x) * CNXK_NIX_CQ_ENTRY_SZ)
+#define CQE_PTR_OFF(b, i, o, f) \
+ (((f) & NIX_RX_VWQE_F) ? \
+ (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \
+ (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o)))
+
union mbuf_initializer {
struct {
uint16_t data_off;
@@ -317,61 +323,87 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf)
}
static __rte_always_inline uint16_t
-cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t pkts, const uint16_t flags)
+cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts,
+ const uint16_t flags, void *lookup_mem,
+ struct cnxk_timesync_info *tstamp)
{
- struct cn10k_eth_rxq *rxq = rx_queue;
- uint16_t packets = 0;
+ struct cn10k_eth_rxq *rxq = args;
+ const uint64_t mbuf_initializer = (flags & NIX_RX_VWQE_F) ?
+ *(uint64_t *)args :
+ rxq->mbuf_initializer;
+ const uint64x2_t data_off = flags & NIX_RX_VWQE_F ?
+ vdupq_n_u64(0x80ULL) :
+ vdupq_n_u64(rxq->data_off);
+ const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask;
+ const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata;
+ const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc;
uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23;
- const uint64_t mbuf_initializer = rxq->mbuf_initializer;
- const uint64x2_t data_off = vdupq_n_u64(rxq->data_off);
uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3;
uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer);
struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3;
- const uint16_t *lookup_mem = rxq->lookup_mem;
- const uint32_t qmask = rxq->qmask;
- const uint64_t wdata = rxq->wdata;
- const uintptr_t desc = rxq->desc;
uint8x16_t f0, f1, f2, f3;
- uint32_t head = rxq->head;
+ uint16_t packets = 0;
uint16_t pkts_left;
-
- pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
- pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
-
- /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
- pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ uint32_t head;
+ uintptr_t cq0;
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ lookup_mem = rxq->lookup_mem;
+ head = rxq->head;
+
+ pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
+ pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
+ /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+ tstamp = rxq->tstamp;
+ } else {
+ RTE_SET_USED(head);
+ }
while (packets < pkts) {
- /* Exit loop if head is about to wrap and become unaligned */
- if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
- NIX_DESCS_PER_LOOP) {
- pkts_left += (pkts - packets);
- break;
- }
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Exit loop if head is about to wrap and become
+ * unaligned.
+ */
+ if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
+ NIX_DESCS_PER_LOOP) {
+ pkts_left += (pkts - packets);
+ break;
+ }
- const uintptr_t cq0 = desc + CQE_SZ(head);
+ cq0 = desc + CQE_SZ(head);
+ } else {
+ cq0 = (uintptr_t)&mbufs[packets];
+ }
/* Prefetch N desc ahead */
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(8)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(9)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(10)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(11)));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 8, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 9, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 10, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 11, 0, flags));
/* Get NIX_RX_SG_S for size and buffer pointer */
- cq0_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(0) + 64));
- cq1_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(1) + 64));
- cq2_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(2) + 64));
- cq3_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(3) + 64));
-
- /* Extract mbuf from NIX_RX_SG_S */
- mbuf01 = vzip2q_u64(cq0_w8, cq1_w8);
- mbuf23 = vzip2q_u64(cq2_w8, cq3_w8);
- mbuf01 = vqsubq_u64(mbuf01, data_off);
- mbuf23 = vqsubq_u64(mbuf23, data_off);
+ cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags));
+ cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags));
+ cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags));
+ cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags));
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Extract mbuf from NIX_RX_SG_S */
+ mbuf01 = vzip2q_u64(cq0_w8, cq1_w8);
+ mbuf23 = vzip2q_u64(cq2_w8, cq3_w8);
+ mbuf01 = vqsubq_u64(mbuf01, data_off);
+ mbuf23 = vqsubq_u64(mbuf23, data_off);
+ } else {
+ mbuf01 =
+ vsubq_u64(vld1q_u64((uint64_t *)cq0), data_off);
+ mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)),
+ data_off);
+ }
/* Move mbufs to scalar registers for future use */
mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0);
@@ -395,14 +427,14 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
f3 = vqtbl1q_u8(cq3_w8, shuf_msk);
/* Load CQE word0 and word 1 */
- uint64_t cq0_w0 = ((uint64_t *)(cq0 + CQE_SZ(0)))[0];
- uint64_t cq0_w1 = ((uint64_t *)(cq0 + CQE_SZ(0)))[1];
- uint64_t cq1_w0 = ((uint64_t *)(cq0 + CQE_SZ(1)))[0];
- uint64_t cq1_w1 = ((uint64_t *)(cq0 + CQE_SZ(1)))[1];
- uint64_t cq2_w0 = ((uint64_t *)(cq0 + CQE_SZ(2)))[0];
- uint64_t cq2_w1 = ((uint64_t *)(cq0 + CQE_SZ(2)))[1];
- uint64_t cq3_w0 = ((uint64_t *)(cq0 + CQE_SZ(3)))[0];
- uint64_t cq3_w1 = ((uint64_t *)(cq0 + CQE_SZ(3)))[1];
+ const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags);
+ const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 1, flags);
+ const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags);
+ const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 1, flags);
+ const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags);
+ const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 1, flags);
+ const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags);
+ const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 1, flags);
if (flags & NIX_RX_OFFLOAD_RSS_F) {
/* Fill rss in the rx_descriptor_fields1 */
@@ -459,17 +491,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) {
ol_flags0 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(0) + 38), ol_flags0,
- mbuf0);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags),
+ ol_flags0, mbuf0);
ol_flags1 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(1) + 38), ol_flags1,
- mbuf1);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags),
+ ol_flags1, mbuf1);
ol_flags2 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(2) + 38), ol_flags2,
- mbuf2);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags),
+ ol_flags2, mbuf2);
ol_flags3 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(3) + 38), ol_flags3,
- mbuf3);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags),
+ ol_flags3, mbuf3);
}
if (flags & NIX_RX_OFFLOAD_TSTAMP_F) {
@@ -488,7 +520,7 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
RTE_PTYPE_L2_ETHER_TIMESYNC};
const uint64_t ts_olf = PKT_RX_IEEE1588_PTP |
PKT_RX_IEEE1588_TMST |
- rxq->tstamp->rx_tstamp_dynflag;
+ tstamp->rx_tstamp_dynflag;
const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8};
uint64x2_t ts01, ts23, mask;
uint64_t ts[4];
@@ -526,14 +558,10 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
ts[3] = vgetq_lane_u64(ts23, 1);
/* Store timestamp into dynfield. */
- *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) =
- ts[0];
- *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) =
- ts[1];
- *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) =
- ts[2];
- *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) =
- ts[3];
+ *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0];
+ *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1];
+ *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2];
+ *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3];
/* Generate ptype mask to filter L2 ether timesync */
mask = vdupq_n_u32(vgetq_lane_u32(f0, 0));
@@ -559,9 +587,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
/* Update Rxq timestamp with the latest
* timestamp.
*/
- rxq->tstamp->rx_ready = 1;
- rxq->tstamp->rx_tstamp =
- ts[31 - __builtin_clz(res)];
+ tstamp->rx_ready = 1;
+ tstamp->rx_tstamp = ts[31 - __builtin_clz(res)];
}
}
@@ -584,25 +611,25 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
/* Store the mbufs to rx_pkts */
- vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
- vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
+ vst1q_u64((uint64_t *)&mbufs[packets], mbuf01);
+ vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23);
if (flags & NIX_RX_MULTI_SEG_F) {
/* Multi segment is enable build mseg list for
* individual mbufs in scalar mode.
*/
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(0) + 8), mbuf0,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 0, 8, flags)),
+ mbuf0, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(1) + 8), mbuf1,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 1, 8, flags)),
+ mbuf1, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(2) + 8), mbuf2,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 2, 8, flags)),
+ mbuf2, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(3) + 8), mbuf3,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 3, 8, flags)),
+ mbuf3, mbuf_initializer, flags);
} else {
/* Update that no more segments */
mbuf0->next = NULL;
@@ -623,12 +650,18 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
__mempool_check_cookies(mbuf2->pool, (void **)&mbuf2, 1, 1);
__mempool_check_cookies(mbuf3->pool, (void **)&mbuf3, 1, 1);
- /* Advance head pointer and packets */
- head += NIX_DESCS_PER_LOOP;
- head &= qmask;
packets += NIX_DESCS_PER_LOOP;
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Advance head pointer and packets */
+ head += NIX_DESCS_PER_LOOP;
+ head &= qmask;
+ }
}
+ if (flags & NIX_RX_VWQE_F)
+ return packets;
+
rxq->head = head;
rxq->available -= packets;
@@ -637,8 +670,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
plt_write64((rxq->wdata | packets), rxq->cq_door);
if (unlikely(pkts_left))
- packets += cn10k_nix_recv_pkts(rx_queue, &rx_pkts[packets],
- pkts_left, flags);
+ packets += cn10k_nix_recv_pkts(args, &mbufs[packets], pkts_left,
+ flags);
return packets;
}
@@ -647,12 +680,15 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
static inline uint16_t
cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t pkts, const uint16_t flags)
+ uint16_t pkts, const uint16_t flags,
+ void *lookup_mem, void *tstamp)
{
+ RTE_SET_USED(lookup_mem);
RTE_SET_USED(rx_queue);
RTE_SET_USED(rx_pkts);
RTE_SET_USED(pkts);
RTE_SET_USED(flags);
+ RTE_SET_USED(tstamp);
return 0;
}
diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c
index 93528a44f..166735ad5 100644
--- a/drivers/net/cnxk/cn10k_rx_vec.c
+++ b/drivers/net/cnxk/cn10k_rx_vec.c
@@ -12,7 +12,7 @@
uint16_t pkts) \
{ \
return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
- (flags)); \
+ (flags), NULL, NULL); \
}
NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
index 04d1e46c8..1f44ddddd 100644
--- a/drivers/net/cnxk/cn10k_rx_vec_mseg.c
+++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
@@ -9,8 +9,9 @@
uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
{ \
- return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
- (flags) | NIX_RX_MULTI_SEG_F); \
+ return cn10k_nix_recv_pkts_vector( \
+ rx_queue, rx_pkts, pkts, (flags) | NIX_RX_MULTI_SEG_F, \
+ NULL, NULL); \
}
NIX_RX_FASTPATH_MODES
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v7 7/7] event/cnxk: add Tx event vector fastpath
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (4 preceding siblings ...)
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
@ 2021-07-03 22:00 ` pbhagavatula
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 1/7] event/cnxk: add Rx adapter support pbhagavatula
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-03 22:00 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add Tx event vector fastpath, integrate event vector Tx routine
into Tx burst.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
doc/guides/eventdevs/cnxk.rst | 1 +
doc/guides/rel_notes/release_21_08.rst | 2 +-
drivers/common/cnxk/roc_sso.h | 23 ++++++
drivers/event/cnxk/cn10k_eventdev.c | 3 +-
drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++--
drivers/event/cnxk/cn9k_worker.h | 4 +-
drivers/event/cnxk/cnxk_worker.h | 22 ------
drivers/net/cnxk/cn10k_tx.c | 2 +-
drivers/net/cnxk/cn10k_tx.h | 52 +++++++++----
drivers/net/cnxk/cn10k_tx_mseg.c | 3 +-
drivers/net/cnxk/cn10k_tx_vec.c | 2 +-
drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +-
12 files changed, 167 insertions(+), 53 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 0297cd3d5..53560d383 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -47,6 +47,7 @@ Features of the OCTEON cnxk SSO PMD are:
- Full Rx/Tx offload support defined through ethdev queue configuration.
- HW managed event vectorization on CN10K for packets enqueued from ethdev to
eventdev configurable per each Rx queue in Rx adapter.
+- Event vector transmission via Tx adapter.
Prerequisites and Compilation procedure
---------------------------------------
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 11ccc9bcb..9e49cb27d 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -64,7 +64,7 @@ New Features
* Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
is net/cnxk.
- * Add support for event vectorization for Rx adapter.
+ * Add support for event vectorization for Rx/Tx adapter.
Removed Items
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index a6030e7d8..316c6ccd5 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -44,6 +44,29 @@ struct roc_sso {
uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
} __plt_cache_aligned;
+static __rte_always_inline void
+roc_sso_hws_head_wait(uintptr_t tag_op)
+{
+#ifdef RTE_ARCH_ARM64
+ uint64_t tag;
+
+ asm volatile(PLT_CPU_FEATURE_PREAMBLE
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbnz %[tag], 35, done%= \n"
+ " sevl \n"
+ "rty%=: wfe \n"
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbz %[tag], 35, rty%= \n"
+ "done%=: \n"
+ : [tag] "=&r"(tag)
+ : [tag_op] "r"(tag_op));
+#else
+ /* Wait for the SWTAG/SWTAG_FULL operation */
+ while (!(plt_read64(tag_op) & BIT_ULL(35)))
+ ;
+#endif
+}
+
/* SSO device initialization */
int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso);
int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso);
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index e85fa4785..6f37c5bd2 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
if (ret)
*caps = 0;
else
- *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
return 0;
}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 7a48a6b17..9cc099206 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
NIX_RX_FASTPATH_MODES
#undef R
-static __rte_always_inline const struct cn10k_eth_txq *
+static __rte_always_inline struct cn10k_eth_txq *
cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
{
- return (const struct cn10k_eth_txq *)
+ return (struct cn10k_eth_txq *)
txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
}
+static __rte_always_inline void
+cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+ uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr,
+ uint8_t sched_type, uintptr_t base,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ uint16_t port[4], queue[4];
+ struct cn10k_eth_txq *txq;
+ uint16_t i, j;
+ uintptr_t pa;
+
+ for (i = 0; i < nb_mbufs; i += 4) {
+ port[0] = mbufs[i]->port;
+ port[1] = mbufs[i + 1]->port;
+ port[2] = mbufs[i + 2]->port;
+ port[3] = mbufs[i + 3]->port;
+
+ queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
+ queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]);
+ queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]);
+ queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]);
+
+ if (((port[0] ^ port[1]) & (port[2] ^ port[3])) ||
+ ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) {
+
+ for (j = 0; j < 4; j++) {
+ struct rte_mbuf *m = mbufs[i + j];
+
+ txq = (struct cn10k_eth_txq *)
+ txq_data[port[j]][queue[j]];
+ cn10k_nix_tx_skeleton(txq, cmd, flags);
+ /* Perform header writes before barrier
+ * for TSO
+ */
+ if (flags & NIX_TX_OFFLOAD_TSO_F)
+ cn10k_nix_xmit_prepare_tso(m, flags);
+
+ cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags,
+ txq->lso_tun_fmt);
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw =
+ cn10k_nix_prepare_mseg(
+ m, (uint64_t *)lmt_addr,
+ flags);
+ pa = txq->io_addr | ((segdw - 1) << 4);
+ } else {
+ pa = txq->io_addr |
+ (cn10k_nix_tx_ext_subs(flags) + 1)
+ << 4;
+ }
+ if (!sched_type)
+ roc_sso_hws_head_wait(base +
+ SSOW_LF_GWS_TAG);
+
+ roc_lmt_submit_steorl(lmt_id, pa);
+ }
+ } else {
+ txq = (struct cn10k_eth_txq *)
+ txq_data[port[0]][queue[0]];
+ cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base
+ + SSOW_LF_GWS_TAG,
+ flags | NIX_TX_VWQE_F);
+ }
+ }
+}
+
static __rte_always_inline uint16_t
cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
uint64_t *cmd,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
const uint32_t flags)
{
- const struct cn10k_eth_txq *txq;
- struct rte_mbuf *m = ev->mbuf;
- uint16_t ref_cnt = m->refcnt;
+ struct cn10k_eth_txq *txq;
+ struct rte_mbuf *m;
uintptr_t lmt_addr;
+ uint16_t ref_cnt;
uint16_t lmt_id;
uintptr_t pa;
lmt_addr = ws->lmt_base;
ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+ if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
+ struct rte_mbuf **mbufs = ev->vec->mbufs;
+ uint64_t meta = *(uint64_t *)ev->vec;
+
+ if (meta & BIT(31)) {
+ txq = (struct cn10k_eth_txq *)
+ txq_data[meta >> 32][meta >> 48];
+
+ cn10k_nix_xmit_pkts_vector(
+ txq, mbufs, meta & 0xFFFF, cmd,
+ ws->tx_base + SSOW_LF_GWS_TAG,
+ flags | NIX_TX_VWQE_F);
+ } else {
+ cn10k_sso_vwqe_split_tx(
+ mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr,
+ ev->sched_type, ws->tx_base, txq_data, flags);
+ }
+ rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+ return (meta & 0xFFFF);
+ }
+
+ m = ev->mbuf;
+ ref_cnt = m->refcnt;
txq = cn10k_sso_hws_xtract_meta(m, txq_data);
cn10k_nix_tx_skeleton(txq, cmd, flags);
/* Perform header writes before barrier for TSO */
@@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
}
if (!ev->sched_type)
- cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
roc_lmt_submit_steorl(lmt_id, pa);
@@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
-
return 1;
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 3f9751211..cc1e14195 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -466,7 +466,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
- cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
cn9k_sso_txq_fc_wait(txq);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
@@ -478,7 +478,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
} else {
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
- cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
cn9k_sso_txq_fc_wait(txq);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_one(cmd, txq->lmt_addr,
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 7891b749d..9f9ceab8a 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -75,26 +75,4 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
#endif
}
-static __rte_always_inline void
-cnxk_sso_hws_head_wait(uintptr_t tag_op)
-{
-#ifdef RTE_ARCH_ARM64
- uint64_t tag;
-
- asm volatile(" ldr %[tag], [%[tag_op]] \n"
- " tbnz %[tag], 35, done%= \n"
- " sevl \n"
- "rty%=: wfe \n"
- " ldr %[tag], [%[tag_op]] \n"
- " tbz %[tag], 35, rty%= \n"
- "done%=: \n"
- : [tag] "=&r"(tag)
- : [tag_op] "r"(tag_op));
-#else
- /* Wait for the HEAD to be set */
- while (!(plt_read64(tag_op) & BIT_ULL(35)))
- ;
-#endif
-}
-
#endif
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 1f30bab59..0e1276c60 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -16,7 +16,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \
- flags); \
+ 0, flags); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index eb148b8e7..f75cae07a 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -18,6 +18,7 @@
* Defining it from backwards to denote its been
* not used as offload flags to pick function
*/
+#define NIX_TX_VWQE_F BIT(14)
#define NIX_TX_MULTI_SEG_F BIT(15)
#define NIX_TX_NEED_SEND_HDR_W1 \
@@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
- uint64_t *cmd, const uint16_t flags)
+ uint64_t *cmd, uintptr_t base, const uint16_t flags)
{
struct cn10k_eth_txq *txq = tx_queue;
const rte_iova_t io_addr = txq->io_addr;
@@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
uint64_t lso_tun_fmt;
uint64_t data;
- NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ if (!(flags & NIX_TX_VWQE_F)) {
+ NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ /* Reduce the cached count */
+ txq->fc_cache_pkts -= pkts;
+ }
/* Get cmd skeleton */
cn10k_nix_tx_skeleton(txq, cmd, flags);
- /* Reduce the cached count */
- txq->fc_cache_pkts -= pkts;
-
if (flags & NIX_TX_OFFLOAD_TSO_F)
lso_tun_fmt = txq->lso_tun_fmt;
@@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
}
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
/* Trigger LMTST */
if (burst > 16) {
data = cn10k_nix_tx_steor_data(flags);
@@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
struct cn10k_eth_txq *txq = tx_queue;
uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
@@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
shft += 3;
}
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
data0 = (uint64_t)data128;
data1 = (uint64_t)(data128 >> 64);
/* Make data0 similar to data1 */
@@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
@@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64_t data[2];
} wd;
- NIX_XMIT_FC_OR_RETURN(txq, pkts);
-
- scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
- pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ if (!(flags & NIX_TX_VWQE_F)) {
+ NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ /* Reduce the cached count */
+ txq->fc_cache_pkts -= pkts;
+ } else {
+ scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ }
- /* Reduce the cached count */
- txq->fc_cache_pkts -= pkts;
/* Perform header writes before barrier for TSO */
if (flags & NIX_TX_OFFLOAD_TSO_F) {
for (i = 0; i < pkts; i++)
@@ -1973,6 +1987,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (flags & NIX_TX_MULTI_SEG_F)
wd.data[0] >>= 16;
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
/* Trigger LMTST */
if (lnum > 16) {
if (!(flags & NIX_TX_MULTI_SEG_F))
@@ -2029,10 +2046,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (unlikely(scalar)) {
if (flags & NIX_TX_MULTI_SEG_F)
pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
- scalar, cmd, flags);
+ scalar, cmd, base,
+ flags);
else
pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
- cmd, flags);
+ cmd, base, flags);
}
return pkts;
@@ -2041,13 +2059,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
#else
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
RTE_SET_USED(tx_queue);
RTE_SET_USED(tx_pkts);
RTE_SET_USED(pkts);
RTE_SET_USED(cmd);
RTE_SET_USED(flags);
+ RTE_SET_USED(base);
return 0;
}
#endif
diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c
index 33f675472..4ea4c8a4e 100644
--- a/drivers/net/cnxk/cn10k_tx_mseg.c
+++ b/drivers/net/cnxk/cn10k_tx_mseg.c
@@ -18,7 +18,8 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \
- (flags) | NIX_TX_MULTI_SEG_F); \
+ 0, (flags) \
+ | NIX_TX_MULTI_SEG_F); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index 34e373750..a0350496a 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -18,7 +18,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
- (flags)); \
+ 0, (flags)); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
index 1fad81dba..7f98f79b9 100644
--- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c
+++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
@@ -16,7 +16,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector( \
- tx_queue, tx_pkts, pkts, cmd, \
+ tx_queue, tx_pkts, pkts, cmd, 0, \
(flags) | NIX_TX_MULTI_SEG_F); \
}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v8 1/7] event/cnxk: add Rx adapter support
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (5 preceding siblings ...)
2021-07-03 22:00 ` [dpdk-dev] [PATCH v7 7/7] event/cnxk: add Tx " pbhagavatula
@ 2021-07-11 23:29 ` pbhagavatula
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
` (6 more replies)
6 siblings, 7 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-11 23:29 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Ray Kinsella
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Rx adapter.
Resize cn10k workslot fastpath structure to fit in 64B cacheline size.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v8 Changes:
- Fix incorrect cq_w1 offset.
- Move doc changes to 1st patch.
v7 Changes:
- Set correct limits for SQB aura.
v6 Changes:
- More code cleanup.
- Fix incorrect SQB configuration and missing fc check.
v5 Changes:
- Use cnxk_eth_rxq_to_sp instead of manually calculating sp offset.
v4 Changes:
- Split patches for easier merge.
v3 Changes:
- Spell check.
doc/guides/eventdevs/cnxk.rst | 33 +++++
doc/guides/rel_notes/release_21_08.rst | 5 +
drivers/common/cnxk/roc_nix.h | 3 +
drivers/common/cnxk/roc_nix_fc.c | 78 +++++++++++
drivers/common/cnxk/roc_nix_priv.h | 3 +-
drivers/common/cnxk/version.map | 1 +
drivers/event/cnxk/cn10k_eventdev.c | 107 ++++++++++++---
drivers/event/cnxk/cn10k_worker.c | 7 +-
drivers/event/cnxk/cn10k_worker.h | 32 +++--
drivers/event/cnxk/cn9k_eventdev.c | 89 +++++++++++++
drivers/event/cnxk/cn9k_worker.h | 4 +
drivers/event/cnxk/cnxk_eventdev.c | 2 +
drivers/event/cnxk/cnxk_eventdev.h | 43 ++++--
drivers/event/cnxk/cnxk_eventdev_adptr.c | 158 +++++++++++++++++++++++
drivers/event/cnxk/meson.build | 9 +-
15 files changed, 527 insertions(+), 47 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 36da3800cc..53560d3830 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -39,6 +39,15 @@ Features of the OCTEON cnxk SSO PMD are:
time granularity of 2.5us on CN9K and 1us on CN10K.
- Up to 256 TIM rings a.k.a event timer adapters.
- Up to 8 rings traversed in parallel.
+- HW managed packets enqueued from ethdev to eventdev exposed through event eth
+ RX adapter.
+- N:1 ethernet device Rx queue to Event queue mapping.
+- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE``
+ capability while maintaining receive packet order.
+- Full Rx/Tx offload support defined through ethdev queue configuration.
+- HW managed event vectorization on CN10K for packets enqueued from ethdev to
+ eventdev configurable per each Rx queue in Rx adapter.
+- Event vector transmission via Tx adapter.
Prerequisites and Compilation procedure
---------------------------------------
@@ -93,6 +102,15 @@ Runtime Config Options
-a 0002:0e:00.0,qos=[1-50-50-50]
+- ``Force Rx Back pressure``
+
+ Force Rx back pressure when same mempool is used across ethernet device
+ connected to event device.
+
+ For example::
+
+ -a 0002:0e:00.0,force_rx_bp=1
+
- ``TIM disable NPA``
By default chunks are allocated from NPA then TIM can automatically free
@@ -160,3 +178,18 @@ Debugging Options
+---+------------+-------------------------------------------------------+
| 2 | TIM | --log-level='pmd\.event\.cnxk\.timer,8' |
+---+------------+-------------------------------------------------------+
+
+Limitations
+-----------
+
+Rx adapter support
+~~~~~~~~~~~~~~~~~~
+
+Using the same mempool for all the ethernet device ports connected to
+event device would cause back pressure to be asserted only on the first
+ethernet device.
+Back pressure is automatically disabled when using same mempool for all the
+ethernet devices connected to event device to override this applications can
+use `force_rx_bp=1` device arguments.
+Using unique mempool per each ethernet device is recommended when they are
+connected to event device.
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 6a902ef9ac..de5ce86c04 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -117,6 +117,11 @@ New Features
The experimental PMD power management API now supports managing
multiple Ethernet Rx queues per lcore.
+* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.**
+ * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
+ is net/cnxk.
+ * Add support for event vectorization for Rx/Tx adapter.
+
Removed Items
-------------
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index bb69027956..76613fe84e 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix,
enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix);
+void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id,
+ uint8_t ena, uint8_t force);
+
/* NPC */
int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable);
diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c
index 47be8aa3f8..f17eba4169 100644
--- a/drivers/common/cnxk/roc_nix_fc.c
+++ b/drivers/common/cnxk/roc_nix_fc.c
@@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode mode)
exit:
return rc;
}
+
+void
+rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena,
+ uint8_t force)
+{
+ struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+ struct npa_lf *lf = idev_npa_obj_get();
+ struct npa_aq_enq_req *req;
+ struct npa_aq_enq_rsp *rsp;
+ struct mbox *mbox;
+ uint32_t limit;
+ int rc;
+
+ if (roc_nix_is_sdp(roc_nix))
+ return;
+
+ if (!lf)
+ return;
+ mbox = lf->mbox;
+
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_READ;
+
+ rc = mbox_process_msg(mbox, (void *)&rsp);
+ if (rc)
+ return;
+
+ limit = rsp->aura.limit;
+ /* BP is already enabled. */
+ if (rsp->aura.bp_ena) {
+ /* If BP ids don't match disable BP. */
+ if ((rsp->aura.nix0_bpid != nix->bpid[0]) && !force) {
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_WRITE;
+
+ req->aura.bp_ena = 0;
+ req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena);
+
+ mbox_process(mbox);
+ }
+ return;
+ }
+
+ /* BP was previously enabled but now disabled skip. */
+ if (rsp->aura.bp)
+ return;
+
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_WRITE;
+
+ if (ena) {
+ req->aura.nix0_bpid = nix->bpid[0];
+ req->aura_mask.nix0_bpid = ~(req->aura_mask.nix0_bpid);
+ req->aura.bp = NIX_RQ_AURA_THRESH(
+ limit > 128 ? 256 : limit); /* 95% of size*/
+ req->aura_mask.bp = ~(req->aura_mask.bp);
+ }
+
+ req->aura.bp_ena = !!ena;
+ req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena);
+
+ mbox_process(mbox);
+}
diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h
index d9c32df442..9dc0c88a6f 100644
--- a/drivers/common/cnxk/roc_nix_priv.h
+++ b/drivers/common/cnxk/roc_nix_priv.h
@@ -16,7 +16,8 @@
#define NIX_SQB_LOWER_THRESH ((uint16_t)70)
/* Apply BP/DROP when CQ is 95% full */
-#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100)
+#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100)
+#define NIX_RQ_AURA_THRESH(x) (((x) * 95) / 100)
/* IRQ triggered when NIX_LF_CINTX_CNT[QCOUNT] crosses this value */
#define CQ_CQE_THRESH_DEFAULT 0x1ULL
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index e3af48c02e..8ea3e9f439 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -83,6 +83,7 @@ INTERNAL {
roc_nix_fc_config_set;
roc_nix_fc_mode_set;
roc_nix_fc_mode_get;
+ rox_nix_fc_npa_bp_cfg;
roc_nix_get_base_chan;
roc_nix_get_pf;
roc_nix_get_pf_func;
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index bf4052c76c..2060c8fe84 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -6,18 +6,6 @@
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
-static void
-cn10k_init_hws_ops(struct cn10k_sso_hws *ws, uintptr_t base)
-{
- ws->tag_wqe_op = base + SSOW_LF_GWS_WQE0;
- ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0;
- ws->updt_wqe_op = base + SSOW_LF_GWS_OP_UPD_WQP_GRP1;
- ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM;
- ws->swtag_untag_op = base + SSOW_LF_GWS_OP_SWTAG_UNTAG;
- ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH;
- ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED;
-}
-
static uint32_t
cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev)
{
@@ -56,7 +44,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
- cn10k_init_hws_ops(ws, ws->base);
ws->hws_id = port_id;
ws->swtag_req = 0;
ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -135,13 +122,14 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
cq_ds_cnt &= 0x3FFF3FFF0000;
while (aq_cnt || cq_ds_cnt || ds_cnt) {
- plt_write64(req, ws->getwrk_op);
+ plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
cn10k_sso_hws_get_work_empty(ws, &ev);
if (fn != NULL && ev.u64 != 0)
fn(arg, ev);
if (ev.sched_type != SSO_TT_EMPTY)
- cnxk_sso_hws_swtag_flush(ws->tag_wqe_op,
- ws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(
+ ws->base + SSOW_LF_GWS_WQE0,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
do {
val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
} while (val & BIT_ULL(56));
@@ -205,9 +193,11 @@ cn10k_sso_hws_reset(void *arg, void *hws)
if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) !=
SSO_TT_EMPTY) {
- plt_write64(BIT_ULL(16) | 1, ws->getwrk_op);
+ plt_write64(BIT_ULL(16) | 1,
+ ws->base + SSOW_LF_GWS_OP_GET_WORK0);
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
@@ -407,6 +397,80 @@ cn10k_sso_selftest(void)
return cnxk_sso_selftest(RTE_STR(event_cn10k));
}
+static int
+cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int rc;
+
+ RTE_SET_USED(event_dev);
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9);
+ if (rc)
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+ else
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+ return 0;
+}
+
+static void
+cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem,
+ void *tstmp_info)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ struct cn10k_sso_hws *ws = event_dev->data->ports[i];
+ ws->lookup_mem = lookup_mem;
+ ws->tstamp = tstmp_info;
+ }
+}
+
+static int
+cn10k_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cn10k_eth_rxq *rxq;
+ void *lookup_mem;
+ void *tstmp_info;
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id,
+ queue_conf);
+ if (rc)
+ return -EINVAL;
+ rxq = eth_dev->data->rx_queues[0];
+ lookup_mem = rxq->lookup_mem;
+ tstmp_info = rxq->tstamp;
+ cn10k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info);
+ cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.dev_infos_get = cn10k_sso_info_get,
.dev_configure = cn10k_sso_dev_configure,
@@ -420,6 +484,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.port_unlink = cn10k_sso_port_unlink,
.timeout_ticks = cnxk_sso_timeout_ticks,
+ .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get,
+ .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add,
+ .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del,
+ .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+ .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
@@ -502,6 +572,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn10k, cn10k_pci_sso_map);
RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
CNXK_SSO_GGRP_QOS "=<string>"
+ CNXK_SSO_FORCE_BP "=1"
CN10K_SSO_GW_MODE "=<int>"
CNXK_TIM_DISABLE_NPA "=1"
CNXK_TIM_CHNK_SLOTS "=<int>"
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index e2aa534c64..5dbae275ba 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -18,7 +18,8 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev)
cn10k_sso_hws_forward_event(ws, ev);
break;
case RTE_EVENT_OP_RELEASE:
- cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, ws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
break;
default:
return 0;
@@ -69,7 +70,7 @@ cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
if (ws->swtag_req) {
ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
return 1;
}
@@ -94,7 +95,7 @@ cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
if (ws->swtag_req) {
ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
return ret;
}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 2f093a8dd5..c7250bf9e7 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,9 +5,13 @@
#ifndef __CN10K_WORKER_H__
#define __CN10K_WORKER_H__
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
+#include "cn10k_ethdev.h"
+#include "cn10k_rx.h"
+
/* SSO Operations */
static __rte_always_inline uint8_t
@@ -31,7 +35,8 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
{
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(ws->tag_wqe_op));
+ const uint8_t cur_tt =
+ CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0));
/* CNXK model
* cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
@@ -43,9 +48,11 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
if (new_tt == SSO_TT_UNTAGGED) {
if (cur_tt != SSO_TT_UNTAGGED)
- cnxk_sso_hws_swtag_untag(ws->swtag_untag_op);
+ cnxk_sso_hws_swtag_untag(ws->base +
+ SSOW_LF_GWS_OP_SWTAG_UNTAG);
} else {
- cnxk_sso_hws_swtag_norm(tag, new_tt, ws->swtag_norm_op);
+ cnxk_sso_hws_swtag_norm(tag, new_tt,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_NORM);
}
ws->swtag_req = 1;
}
@@ -57,8 +64,9 @@ cn10k_sso_hws_fwd_group(struct cn10k_sso_hws *ws, const struct rte_event *ev,
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- plt_write64(ev->u64, ws->updt_wqe_op);
- cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op);
+ plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+ cnxk_sso_hws_swtag_desched(tag, new_tt, grp,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
}
static __rte_always_inline void
@@ -68,7 +76,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
const uint8_t grp = ev->queue_id;
/* Group hasn't changed, Use SWTAG to forward the event */
- if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_wqe_op)) == grp)
+ if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp)
cn10k_sso_hws_fwd_swtag(ws, ev);
else
/*
@@ -93,12 +101,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
PLT_CPU_FEATURE_PREAMBLE
"caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
: [wdata] "+r"(gw.get_work)
- : [gw_loc] "r"(ws->getwrk_op)
+ : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
: "memory");
#else
- plt_write64(gw.u64[0], ws->getwrk_op);
+ plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
@@ -130,11 +139,12 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
: [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
- : [tag_loc] "r"(ws->tag_wqe_op)
+ : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
: "memory");
#else
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
#endif
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 0684417eab..072800c243 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -481,6 +481,88 @@ cn9k_sso_selftest(void)
return cnxk_sso_selftest(RTE_STR(event_cn9k));
}
+static int
+cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int rc;
+
+ RTE_SET_USED(event_dev);
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9);
+ if (rc)
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+ else
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+ return 0;
+}
+
+static void
+cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem,
+ void *tstmp_info)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ if (dev->dual_ws) {
+ struct cn9k_sso_hws_dual *dws =
+ event_dev->data->ports[i];
+ dws->lookup_mem = lookup_mem;
+ dws->tstamp = tstmp_info;
+ } else {
+ struct cn9k_sso_hws *ws = event_dev->data->ports[i];
+ ws->lookup_mem = lookup_mem;
+ ws->tstamp = tstmp_info;
+ }
+ }
+}
+
+static int
+cn9k_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cn9k_eth_rxq *rxq;
+ void *lookup_mem;
+ void *tstmp_info;
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (rc)
+ return -EINVAL;
+
+ rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id,
+ queue_conf);
+ if (rc)
+ return -EINVAL;
+
+ rxq = eth_dev->data->rx_queues[0];
+ lookup_mem = rxq->lookup_mem;
+ tstmp_info = rxq->tstamp;
+ cn9k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info);
+ cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (rc)
+ return -EINVAL;
+
+ return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.dev_infos_get = cn9k_sso_info_get,
.dev_configure = cn9k_sso_dev_configure,
@@ -494,6 +576,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.port_unlink = cn9k_sso_port_unlink,
.timeout_ticks = cnxk_sso_timeout_ticks,
+ .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get,
+ .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add,
+ .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del,
+ .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+ .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
@@ -571,6 +659,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn9k, cn9k_pci_sso_map);
RTE_PMD_REGISTER_KMOD_DEP(event_cn9k, "vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(event_cn9k, CNXK_SSO_XAE_CNT "=<int>"
CNXK_SSO_GGRP_QOS "=<string>"
+ CNXK_SSO_FORCE_BP "=1"
CN9K_SSO_SINGLE_WS "=1"
CNXK_TIM_DISABLE_NPA "=1"
CNXK_TIM_CHNK_SLOTS "=<int>"
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 38fca08fb6..f5a4401465 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -5,9 +5,13 @@
#ifndef __CN9K_WORKER_H__
#define __CN9K_WORKER_H__
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
+#include "cn9k_ethdev.h"
+#include "cn9k_rx.h"
+
/* SSO Operations */
static __rte_always_inline uint8_t
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 7189ee3a79..cfd7fb971c 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -571,6 +571,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
&dev->xae_cnt);
rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict,
dev);
+ rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_value,
+ &dev->force_ena_bp);
rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_value,
&single_ws);
rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 668e51d62a..b65d725f55 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -6,6 +6,8 @@
#define __CNXK_EVENTDEV_H__
#include <rte_devargs.h>
+#include <rte_ethdev.h>
+#include <rte_event_eth_rx_adapter.h>
#include <rte_kvargs.h>
#include <rte_mbuf_pool_ops.h>
#include <rte_pci.h>
@@ -18,6 +20,7 @@
#define CNXK_SSO_XAE_CNT "xae_cnt"
#define CNXK_SSO_GGRP_QOS "qos"
+#define CNXK_SSO_FORCE_BP "force_rx_bp"
#define CN9K_SSO_SINGLE_WS "single_ws"
#define CN10K_SSO_GW_MODE "gw_mode"
@@ -81,7 +84,10 @@ struct cnxk_sso_evdev {
uint64_t nb_xaq_cfg;
rte_iova_t fc_iova;
struct rte_mempool *xaq_pool;
+ uint64_t rx_offloads;
uint64_t adptr_xae_cnt;
+ uint16_t rx_adptr_pool_cnt;
+ uint64_t *rx_adptr_pools;
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
@@ -89,25 +95,18 @@ struct cnxk_sso_evdev {
uint32_t xae_cnt;
uint8_t qos_queue_cnt;
struct cnxk_sso_qos *qos_parse_data;
+ uint8_t force_ena_bp;
/* CN9K */
uint8_t dual_ws;
/* CN10K */
uint8_t gw_mode;
} __rte_cache_aligned;
-/* CN10K HWS ops */
-#define CN10K_SSO_HWS_OPS \
- uintptr_t swtag_desched_op; \
- uintptr_t swtag_flush_op; \
- uintptr_t swtag_untag_op; \
- uintptr_t swtag_norm_op; \
- uintptr_t updt_wqe_op; \
- uintptr_t tag_wqe_op; \
- uintptr_t getwrk_op
-
struct cn10k_sso_hws {
- /* Get Work Fastpath data */
- CN10K_SSO_HWS_OPS;
+ uint64_t base;
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint32_t gw_wdata;
uint8_t swtag_req;
uint8_t hws_id;
@@ -115,7 +114,6 @@ struct cn10k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base;
uintptr_t lmt_base;
} __rte_cache_aligned;
@@ -132,6 +130,9 @@ struct cn10k_sso_hws {
struct cn9k_sso_hws {
/* Get Work Fastpath data */
CN9K_SSO_HWS_OPS;
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint8_t swtag_req;
uint8_t hws_id;
/* Add Work Fastpath data */
@@ -148,6 +149,9 @@ struct cn9k_sso_hws_state {
struct cn9k_sso_hws_dual {
/* Get Work Fastpath data */
struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint8_t swtag_req;
uint8_t vws; /* Ping pong bit */
uint8_t hws_id;
@@ -250,4 +254,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev,
/* CN9K */
void cn9k_sso_set_rsrc(void *arg);
+/* Common adapter ops */
+int cnxk_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf);
+int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id);
+int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev);
+int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev);
+
#endif /* __CNXK_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 89a1d82c14..3b7ecb375a 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -2,6 +2,7 @@
* Copyright(C) 2021 Marvell.
*/
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
void
@@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
int i;
switch (event_type) {
+ case RTE_EVENT_TYPE_ETHDEV: {
+ struct cnxk_eth_rxq_sp *rxq = data;
+ uint64_t *old_ptr;
+
+ for (i = 0; i < dev->rx_adptr_pool_cnt; i++) {
+ if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i])
+ return;
+ }
+
+ dev->rx_adptr_pool_cnt++;
+ old_ptr = dev->rx_adptr_pools;
+ dev->rx_adptr_pools = rte_realloc(
+ dev->rx_adptr_pools,
+ sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0);
+ if (dev->rx_adptr_pools == NULL) {
+ dev->adptr_xae_cnt += rxq->qconf.mp->size;
+ dev->rx_adptr_pools = old_ptr;
+ dev->rx_adptr_pool_cnt--;
+ return;
+ }
+ dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] =
+ (uint64_t)rxq->qconf.mp;
+
+ dev->adptr_xae_cnt += rxq->qconf.mp->size;
+ break;
+ }
case RTE_EVENT_TYPE_TIMER: {
struct cnxk_tim_ring *timr = data;
uint16_t *old_ring_ptr;
@@ -65,3 +92,134 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
break;
}
}
+
+static int
+cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id,
+ uint16_t port_id, const struct rte_event *ev,
+ uint8_t custom_flowid)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+ rq->sso_ena = 1;
+ rq->tt = ev->sched_type;
+ rq->hwgrp = ev->queue_id;
+ rq->flow_tag_width = 20;
+ rq->wqe_skip = 1;
+ rq->tag_mask = (port_id & 0xF) << 20;
+ rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4))
+ << 24;
+
+ if (custom_flowid) {
+ rq->flow_tag_width = 0;
+ rq->tag_mask |= ev->flow_id;
+ }
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+static int
+cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+ rq->sso_ena = 0;
+ rq->flow_tag_width = 32;
+ rq->tag_mask = 0;
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+int
+cnxk_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ uint16_t port = eth_dev->data->port_id;
+ struct cnxk_eth_rxq_sp *rxq_sp;
+ int i, rc = 0;
+
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+ rc |= cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev,
+ i, queue_conf);
+ } else {
+ rxq_sp = cnxk_eth_rxq_to_sp(
+ eth_dev->data->rx_queues[rx_queue_id]);
+ cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc |= cnxk_sso_rxq_enable(
+ cnxk_eth_dev, (uint16_t)rx_queue_id, port,
+ &queue_conf->ev,
+ !!(queue_conf->rx_queue_flags &
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID));
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, true,
+ dev->force_ena_bp);
+ }
+
+ if (rc < 0) {
+ plt_err("Failed to configure Rx adapter port=%d, q=%d", port,
+ queue_conf->ev.queue_id);
+ return rc;
+ }
+
+ dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags;
+
+ return 0;
+}
+
+int
+cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ struct cnxk_eth_rxq_sp *rxq_sp;
+ int i, rc = 0;
+
+ RTE_SET_USED(event_dev);
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+ cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i);
+ } else {
+ rxq_sp = cnxk_eth_rxq_to_sp(
+ eth_dev->data->rx_queues[rx_queue_id]);
+ rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id);
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, false,
+ dev->force_ena_bp);
+ }
+
+ if (rc < 0)
+ plt_err("Failed to clear Rx adapter config port=%d, q=%d",
+ eth_dev->data->port_id, rx_queue_id);
+
+ return rc;
+}
+
+int
+cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev)
+{
+ RTE_SET_USED(event_dev);
+ RTE_SET_USED(eth_dev);
+
+ return 0;
+}
+
+int
+cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev)
+{
+ RTE_SET_USED(event_dev);
+ RTE_SET_USED(eth_dev);
+
+ return 0;
+}
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 87bb9f76a9..eda562f5b5 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -21,4 +21,11 @@ sources = files(
'cnxk_tim_worker.c',
)
-deps += ['bus_pci', 'common_cnxk']
+extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
+foreach flag: extra_flags
+ if cc.has_argument(flag)
+ cflags += flag
+ endif
+endforeach
+
+deps += ['bus_pci', 'common_cnxk', 'net_cnxk']
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v8 2/7] event/cnxk: add Rx adapter fastpath ops
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 1/7] event/cnxk: add Rx adapter support pbhagavatula
@ 2021-07-11 23:29 ` pbhagavatula
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 3/7] event/cnxk: add Tx adapter support pbhagavatula
` (5 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-11 23:29 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Rx adapter fastpath operations.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 136 +++++++-
drivers/event/cnxk/cn10k_worker.c | 54 ----
drivers/event/cnxk/cn10k_worker.h | 97 +++++-
drivers/event/cnxk/cn10k_worker_deq.c | 44 +++
drivers/event/cnxk/cn10k_worker_deq_burst.c | 29 ++
drivers/event/cnxk/cn10k_worker_deq_tmo.c | 72 +++++
drivers/event/cnxk/cn9k_eventdev.c | 305 +++++++++++++++++-
drivers/event/cnxk/cn9k_worker.c | 117 -------
drivers/event/cnxk/cn9k_worker.h | 174 ++++++++--
drivers/event/cnxk/cn9k_worker_deq.c | 44 +++
drivers/event/cnxk/cn9k_worker_deq_burst.c | 29 ++
drivers/event/cnxk/cn9k_worker_deq_tmo.c | 72 +++++
drivers/event/cnxk/cn9k_worker_dual_deq.c | 53 +++
.../event/cnxk/cn9k_worker_dual_deq_burst.c | 30 ++
drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 89 +++++
drivers/event/cnxk/cnxk_eventdev.h | 1 +
drivers/event/cnxk/meson.build | 9 +
17 files changed, 1124 insertions(+), 231 deletions(-)
create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c
create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c
create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 2060c8fe84..ba7d95fff7 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -237,17 +237,141 @@ static void
cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst;
-
- event_dev->dequeue = cn10k_sso_hws_deq;
- event_dev->dequeue_burst = cn10k_sso_hws_deq_burst;
- if (dev->is_timeout_deq) {
- event_dev->dequeue = cn10k_sso_hws_tmo_deq;
- event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_deq_seg
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_seg_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_tmo_deq_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_deq
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_tmo_deq
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_tmo_deq_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
}
}
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index 5dbae275ba..c71aa37327 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -60,57 +60,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-
-uint16_t __rte_hot
-cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn10k_sso_hws *ws = port;
-
- RTE_SET_USED(timeout_ticks);
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
- return 1;
- }
-
- return cn10k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
- uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn10k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn10k_sso_hws *ws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
- return ret;
- }
-
- ret = cn10k_sso_hws_get_work(ws, ev);
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
- ret = cn10k_sso_hws_get_work(ws, ev);
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index c7250bf9e7..b724083caa 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -87,20 +87,37 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
cn10k_sso_hws_fwd_group(ws, ev, grp);
}
+static __rte_always_inline void
+cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+ const uint32_t tag, const uint32_t flags,
+ const void *const lookup_mem)
+{
+ const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+
+ cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+ (struct rte_mbuf *)mbuf, lookup_mem,
+ mbuf_init | ((uint64_t)port_id) << 48, flags);
+}
+
static __rte_always_inline uint16_t
-cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
+cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
+ const uint32_t flags, void *lookup_mem)
{
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
gw.get_work = ws->gw_wdata;
#if defined(RTE_ARCH_ARM64) && !defined(__clang__)
asm volatile(
PLT_CPU_FEATURE_PREAMBLE
"caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
- : [wdata] "+r"(gw.get_work)
+ "sub %[mbuf], %H[wdata], #0x80 \n"
+ : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf)
: [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
: "memory");
#else
@@ -109,11 +126,34 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
roc_load_pair(gw.u64[0], gw.u64[1],
ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
+ ws->tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -128,6 +168,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t mbuf;
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -138,7 +179,9 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
" ldp %[tag], %[wqp], [%[tag_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
: "memory");
#else
@@ -146,12 +189,25 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
roc_load_pair(gw.u64[0], gw.u64[1],
ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, 0, NULL);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -170,16 +226,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
const struct rte_event ev[],
uint16_t nb_events);
-uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
#endif
diff --git a/drivers/event/cnxk/cn10k_worker_deq.c b/drivers/event/cnxk/cn10k_worker_deq.c
new file mode 100644
index 0000000000..36ec454ccc
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return 1; \
+ } \
+ \
+ return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return 1; \
+ } \
+ \
+ return cn10k_sso_hws_get_work( \
+ ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn10k_worker_deq_burst.c b/drivers/event/cnxk/cn10k_worker_deq_burst.c
new file mode 100644
index 0000000000..29ecc551cf
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq_burst.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn10k_worker_deq_tmo.c b/drivers/event/cnxk/cn10k_worker_deq_tmo.c
new file mode 100644
index 0000000000..c8524a27bd
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq_tmo.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return ret; \
+ } \
+ \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return ret; \
+ } \
+ \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 072800c243..e386cb784a 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -252,17 +252,202 @@ static void
cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ /* Single WS modes */
+ const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_tmo[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_tmo_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_tmo_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_deq_tmo_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ /* Dual WS modes */
+ const event_dequeue_t sso_hws_dual_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_tmo[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_tmo_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_tmo_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_tmo_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
event_dev->enqueue = cn9k_sso_hws_enq;
event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst;
-
- event_dev->dequeue = cn9k_sso_hws_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_deq_burst;
- if (dev->deq_tmo_ns) {
- event_dev->dequeue = cn9k_sso_hws_tmo_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_deq_seg
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_seg_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_deq_tmo_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_tmo_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_deq
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_deq_tmo
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_tmo_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
}
if (dev->dual_ws) {
@@ -272,14 +457,110 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
event_dev->enqueue_forward_burst =
cn9k_sso_hws_dual_enq_fwd_burst;
- event_dev->dequeue = cn9k_sso_hws_dual_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst;
- if (dev->deq_tmo_ns) {
- event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq;
- event_dev->dequeue_burst =
- cn9k_sso_hws_dual_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_dual_deq_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_dual_deq_tmo_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst =
+ sso_hws_dual_deq_tmo_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_dual_deq
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_dual_deq_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_dual_deq_tmo
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst =
+ sso_hws_dual_deq_tmo_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ }
}
}
+
+ rte_mb();
}
static void *
diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c
index 9ceacc98dd..538bc4b0b3 100644
--- a/drivers/event/cnxk/cn9k_worker.c
+++ b/drivers/event/cnxk/cn9k_worker.c
@@ -60,60 +60,6 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-uint16_t __rte_hot
-cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws *ws = port;
-
- RTE_SET_USED(timeout_ticks);
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_op);
- return 1;
- }
-
- return cn9k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
- uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws *ws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_op);
- return ret;
- }
-
- ret = cn9k_sso_hws_get_work(ws, ev);
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
- ret = cn9k_sso_hws_get_work(ws, ev);
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
-
/* Dual ws ops. */
uint16_t __rte_hot
@@ -171,66 +117,3 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws_dual *dws = port;
- uint16_t gw;
-
- RTE_SET_USED(timeout_ticks);
- if (dws->swtag_req) {
- dws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
- return 1;
- }
-
- gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- return gw;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws_dual *dws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (dws->swtag_req) {
- dws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
- return ret;
- }
-
- ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) {
- ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- }
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index f5a4401465..c01c00e1da 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -128,17 +128,36 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws,
}
}
+static __rte_always_inline void
+cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+ const uint32_t tag, const uint32_t flags,
+ const void *const lookup_mem)
+{
+ const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+
+ cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+ (struct rte_mbuf *)mbuf, lookup_mem,
+ mbuf_init | ((uint64_t)port_id) << 48, flags);
+}
+
static __rte_always_inline uint16_t
cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
struct cn9k_sso_hws_state *ws_pair,
- struct rte_event *ev)
+ struct rte_event *ev, const uint32_t flags,
+ const void *const lookup_mem,
+ struct cnxk_timesync_info *const tstamp)
{
const uint64_t set_gw = BIT_ULL(16) | 1;
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
+ if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+ rte_prefetch_non_temporal(lookup_mem);
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
"rty%=: \n"
@@ -147,7 +166,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
" tbnz %[tag], 63, rty%= \n"
"done%=: str %[gw], [%[pong]] \n"
" dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ " prfm pldl1keep, [%[mbuf]] \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op),
[gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op));
#else
@@ -156,12 +178,34 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
plt_write64(set_gw, ws_pair->getwrk_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -169,16 +213,22 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
}
static __rte_always_inline uint16_t
-cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
+cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev,
+ const uint32_t flags, const void *const lookup_mem)
{
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
plt_write64(BIT_ULL(16) | /* wait for work. */
1, /* Use Mask set 0. */
ws->getwrk_op);
+
+ if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+ rte_prefetch_non_temporal(lookup_mem);
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
" ldr %[tag], [%[tag_loc]] \n"
@@ -190,7 +240,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
" ldr %[wqp], [%[wqp_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ " prfm pldl1keep, [%[mbuf]] \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
#else
gw.u64[0] = plt_read64(ws->tag_op);
@@ -198,12 +251,35 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
+ ws->tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -218,6 +294,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t mbuf;
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -230,7 +307,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
" ldr %[wqp], [%[wqp_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
#else
gw.u64[0] = plt_read64(ws->tag_op);
@@ -238,12 +317,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, 0, NULL);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -274,28 +366,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port,
const struct rte_event ev[],
uint16_t nb_events);
-uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-
-uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
#endif
diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c
new file mode 100644
index 0000000000..51ccaf4ec4
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return 1; \
+ } \
+ \
+ return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return 1; \
+ } \
+ \
+ return cn9k_sso_hws_get_work( \
+ ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_deq_burst.c b/drivers/event/cnxk/cn9k_worker_deq_burst.c
new file mode 100644
index 0000000000..4e2801459b
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq_burst.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
new file mode 100644
index 0000000000..9713d1ef00
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c
new file mode 100644
index 0000000000..709fa2d9ef
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t gw; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return 1; \
+ } \
+ \
+ gw = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ return gw; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t gw; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return 1; \
+ } \
+ \
+ gw = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ return gw; \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
new file mode 100644
index 0000000000..d50e1cf83f
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
new file mode 100644
index 0000000000..a0508fdf0d
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], \
+ &dws->ws_state[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ } \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_tmo_##name(port, ev, \
+ timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], \
+ &dws->ws_state[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ } \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index b65d725f55..9d5d2d0339 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -33,6 +33,7 @@
#define CNXK_SSO_MZ_NAME "cnxk_evdev_mz"
#define CNXK_SSO_XAQ_CACHE_CNT (0x7)
#define CNXK_SSO_XAQ_SLACK (8)
+#define CNXK_SSO_WQE_SG_PTR (9)
#define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY)
#define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY)
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index eda562f5b5..c5c1c0ee8e 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -11,8 +11,17 @@ endif
sources = files(
'cn9k_eventdev.c',
'cn9k_worker.c',
+ 'cn9k_worker_deq.c',
+ 'cn9k_worker_deq_burst.c',
+ 'cn9k_worker_deq_tmo.c',
+ 'cn9k_worker_dual_deq.c',
+ 'cn9k_worker_dual_deq_burst.c',
+ 'cn9k_worker_dual_deq_tmo.c',
'cn10k_eventdev.c',
'cn10k_worker.c',
+ 'cn10k_worker_deq.c',
+ 'cn10k_worker_deq_burst.c',
+ 'cn10k_worker_deq_tmo.c',
'cnxk_eventdev.c',
'cnxk_eventdev_adptr.c',
'cnxk_eventdev_selftest.c',
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v8 3/7] event/cnxk: add Tx adapter support
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
@ 2021-07-11 23:29 ` pbhagavatula
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
` (4 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-11 23:29 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
Satha Rao, Pavan Nikhilesh, Shijith Thotton
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Tx adapter.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
---
drivers/common/cnxk/roc_nix.h | 1 +
drivers/common/cnxk/roc_nix_queue.c | 8 +-
drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++
drivers/event/cnxk/cn9k_eventdev.c | 148 +++++++++++++++++++++++
drivers/event/cnxk/cnxk_eventdev.h | 22 +++-
drivers/event/cnxk/cnxk_eventdev_adptr.c | 88 ++++++++++++++
6 files changed, 353 insertions(+), 5 deletions(-)
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index 76613fe84e..822c1900e2 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -200,6 +200,7 @@ struct roc_nix_sq {
uint64_t aura_handle;
int16_t nb_sqb_bufs_adj;
uint16_t nb_sqb_bufs;
+ uint16_t aura_sqb_bufs;
plt_iova_t io_addr;
void *lmt_addr;
void *sqe_mem;
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index 0604e7a18e..7e2f86eca7 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -587,12 +587,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
aura.fc_ena = 1;
aura.fc_addr = (uint64_t)sq->fc;
aura.fc_hyst_bits = 0; /* Store count on all updates */
- rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, nb_sqb_bufs, &aura,
+ rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, NIX_MAX_SQB, &aura,
&pool);
if (rc)
goto fail;
- sq->sqe_mem = plt_zmalloc(blk_sz * nb_sqb_bufs, blk_sz);
+ sq->sqe_mem = plt_zmalloc(blk_sz * NIX_MAX_SQB, blk_sz);
if (sq->sqe_mem == NULL) {
rc = NIX_ERR_NO_MEM;
goto nomem;
@@ -600,11 +600,13 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
/* Fill the initial buffers */
iova = (uint64_t)sq->sqe_mem;
- for (count = 0; count < nb_sqb_bufs; count++) {
+ for (count = 0; count < NIX_MAX_SQB; count++) {
roc_npa_aura_op_free(sq->aura_handle, 0, iova);
iova += blk_sz;
}
roc_npa_aura_op_range_set(sq->aura_handle, (uint64_t)sq->sqe_mem, iova);
+ roc_npa_aura_limit_modify(sq->aura_handle, sq->nb_sqb_bufs);
+ sq->aura_sqb_bufs = NIX_MAX_SQB;
return rc;
nomem:
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index ba7d95fff7..8a9b04a3db 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+ ws->tx_base = ws->base;
ws->hws_id = port_id;
ws->swtag_req = 0;
ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
}
+static int
+cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ if (dev->tx_adptr_data == NULL)
+ return 0;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ struct cn10k_sso_hws *ws = event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(ws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn10k_sso_hws) +
+ (sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = ws;
+ }
+
+ return 0;
+}
+
static void
cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
@@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
{
int rc;
+ rc = cn10k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+
rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
cn10k_sso_hws_flush_events);
if (rc < 0)
@@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (ret)
+ *caps = 0;
+ else
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+ return 0;
+}
+
+static int
+cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ rc = cn10k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+ cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ return cn10k_sso_updt_tx_adptr_data(event_dev);
+}
+
static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.dev_infos_get = cn10k_sso_info_get,
.dev_configure = cn10k_sso_dev_configure,
@@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get,
+ .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add,
+ .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index e386cb784a..21f80323d9 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
}
+static int
+cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ if (dev->tx_adptr_data == NULL)
+ return 0;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ if (dev->dual_ws) {
+ struct cn9k_sso_hws_dual *dws =
+ event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(dws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn9k_sso_hws_dual) +
+ (sizeof(uint64_t) *
+ (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ dws = RTE_PTR_ADD(ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&dws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = dws;
+ } else {
+ struct cn9k_sso_hws *ws = event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(ws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn9k_sso_hws_dual) +
+ (sizeof(uint64_t) *
+ (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ ws = RTE_PTR_ADD(ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = ws;
+ }
+ }
+ rte_mb();
+
+ return 0;
+}
+
static void
cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
@@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev)
{
int rc;
+ rc = cn9k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+
rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset,
cn9k_sso_hws_flush_events);
if (rc < 0)
@@ -844,6 +908,86 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (ret)
+ *caps = 0;
+ else
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+ return 0;
+}
+
+static void
+cn9k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id,
+ bool ena)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cn9k_eth_txq *txq;
+ struct roc_nix_sq *sq;
+ int i;
+
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+ cn9k_sso_txq_fc_update(eth_dev, i, ena);
+ } else {
+ uint16_t sq_limit;
+
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ txq = eth_dev->data->tx_queues[tx_queue_id];
+ sq_limit =
+ ena ? RTE_MIN(CNXK_SSO_SQB_LIMIT, sq->aura_sqb_bufs) :
+ sq->nb_sqb_bufs;
+ txq->nb_sqb_bufs_adj =
+ sq_limit -
+ RTE_ALIGN_MUL_CEIL(sq_limit,
+ (1ULL << txq->sqes_per_sqb_log2)) /
+ (1ULL << txq->sqes_per_sqb_log2);
+ txq->nb_sqb_bufs_adj = (70 * txq->nb_sqb_bufs_adj) / 100;
+ }
+}
+
+static int
+cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, true);
+ rc = cn9k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+ cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, false);
+ return cn9k_sso_updt_tx_adptr_data(event_dev);
+}
+
static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.dev_infos_get = cn9k_sso_info_get,
.dev_configure = cn9k_sso_dev_configure,
@@ -863,6 +1007,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get,
+ .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add,
+ .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 9d5d2d0339..24e1be6a97 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -8,6 +8,7 @@
#include <rte_devargs.h>
#include <rte_ethdev.h>
#include <rte_event_eth_rx_adapter.h>
+#include <rte_event_eth_tx_adapter.h>
#include <rte_kvargs.h>
#include <rte_mbuf_pool_ops.h>
#include <rte_pci.h>
@@ -34,6 +35,7 @@
#define CNXK_SSO_XAQ_CACHE_CNT (0x7)
#define CNXK_SSO_XAQ_SLACK (8)
#define CNXK_SSO_WQE_SG_PTR (9)
+#define CNXK_SSO_SQB_LIMIT (0x180)
#define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY)
#define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY)
@@ -86,9 +88,12 @@ struct cnxk_sso_evdev {
rte_iova_t fc_iova;
struct rte_mempool *xaq_pool;
uint64_t rx_offloads;
+ uint64_t tx_offloads;
uint64_t adptr_xae_cnt;
uint16_t rx_adptr_pool_cnt;
uint64_t *rx_adptr_pools;
+ uint64_t *tx_adptr_data;
+ uint16_t max_port_id;
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
@@ -115,7 +120,10 @@ struct cn10k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
+ /* Tx Fastpath data */
+ uint64_t tx_base __rte_cache_aligned;
uintptr_t lmt_base;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
/* CN9K HWS ops */
@@ -140,7 +148,9 @@ struct cn9k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base;
+ /* Tx Fastpath data */
+ uint64_t base __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct cn9k_sso_hws_state {
@@ -160,7 +170,9 @@ struct cn9k_sso_hws_dual {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base[2];
+ /* Tx Fastpath data */
+ uint64_t base[2] __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct cnxk_sso_hws_cookie {
@@ -267,5 +279,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
const struct rte_eth_dev *eth_dev);
int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
const struct rte_eth_dev *eth_dev);
+int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id);
+int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id);
#endif /* __CNXK_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 3b7ecb375a..502da272d8 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -223,3 +223,91 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
return 0;
}
+
+static int
+cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs)
+{
+ return roc_npa_aura_limit_modify(
+ sq->aura_handle, RTE_MIN(nb_sqb_bufs, sq->aura_sqb_bufs));
+}
+
+static int
+cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev,
+ uint16_t eth_port_id, uint16_t tx_queue_id,
+ void *txq)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ uint16_t max_port_id = dev->max_port_id;
+ uint64_t *txq_data = dev->tx_adptr_data;
+
+ if (txq_data == NULL || eth_port_id > max_port_id) {
+ max_port_id = RTE_MAX(max_port_id, eth_port_id);
+ txq_data = rte_realloc_socket(
+ txq_data,
+ (sizeof(uint64_t) * (max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, event_dev->data->socket_id);
+ if (txq_data == NULL)
+ return -ENOMEM;
+ }
+
+ ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT])
+ txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq;
+ dev->max_port_id = max_port_id;
+ dev->tx_adptr_data = txq_data;
+ return 0;
+}
+
+int
+cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ struct roc_nix_sq *sq;
+ int i, ret;
+ void *txq;
+
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+ cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, i);
+ } else {
+ txq = eth_dev->data->tx_queues[tx_queue_id];
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id, txq);
+ if (ret < 0)
+ return ret;
+
+ dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags;
+ }
+
+ return 0;
+}
+
+int
+cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct roc_nix_sq *sq;
+ int i, ret;
+
+ RTE_SET_USED(event_dev);
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+ cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, i);
+ } else {
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id, NULL);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v8 4/7] event/cnxk: add Tx adapter fastpath ops
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 3/7] event/cnxk: add Tx adapter support pbhagavatula
@ 2021-07-11 23:29 ` pbhagavatula
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
` (3 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-11 23:29 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Tx adapter fastpath operations.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 38 ++++++++
drivers/event/cnxk/cn10k_worker.h | 67 +++++++++++++
drivers/event/cnxk/cn10k_worker_tx_enq.c | 23 +++++
drivers/event/cnxk/cn10k_worker_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cn9k_eventdev.c | 81 ++++++++++++++++
drivers/event/cnxk/cn9k_worker.h | 97 +++++++++++++++++++
drivers/event/cnxk/cn9k_worker_dual_tx_enq.c | 23 +++++
.../event/cnxk/cn9k_worker_dual_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cn9k_worker_tx_enq.c | 23 +++++
drivers/event/cnxk/cn9k_worker_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cnxk_worker.h | 27 +++---
drivers/event/cnxk/meson.build | 6 ++
12 files changed, 440 insertions(+), 14 deletions(-)
create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c
create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 8a9b04a3db..e462f770c5 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
#undef R
};
+ /* Tx modes */
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
@@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
}
}
+
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
+
+ event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
}
static void
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index b724083caa..3c90c85009 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -11,6 +11,7 @@
#include "cn10k_ethdev.h"
#include "cn10k_rx.h"
+#include "cn10k_tx.h"
/* SSO Operations */
@@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
NIX_RX_FASTPATH_MODES
#undef R
+static __rte_always_inline const struct cn10k_eth_txq *
+cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+ return (const struct cn10k_eth_txq *)
+ txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline uint16_t
+cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
+ uint64_t *cmd,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ const struct cn10k_eth_txq *txq;
+ struct rte_mbuf *m = ev->mbuf;
+ uint16_t ref_cnt = m->refcnt;
+ uintptr_t lmt_addr;
+ uint16_t lmt_id;
+ uintptr_t pa;
+
+ lmt_addr = ws->lmt_base;
+ ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+ txq = cn10k_sso_hws_xtract_meta(m, txq_data);
+ cn10k_nix_tx_skeleton(txq, cmd, flags);
+ /* Perform header writes before barrier for TSO */
+ if (flags & NIX_TX_OFFLOAD_TSO_F)
+ cn10k_nix_xmit_prepare_tso(m, flags);
+
+ cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt);
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw =
+ cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags);
+ pa = txq->io_addr | ((segdw - 1) << 4);
+ } else {
+ pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
+ }
+ if (!ev->sched_type)
+ cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
+
+ roc_lmt_submit_steorl(lmt_id, pa);
+
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if (ref_cnt > 1)
+ return 1;
+ }
+
+ cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
+ ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+ return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
#endif
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c
new file mode 100644
index 0000000000..f9968ac0d0
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn10k_sso_hws_event_tx( \
+ ws, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
new file mode 100644
index 0000000000..a24fc42e5a
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn10k_sso_hws_event_tx( \
+ ws, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 21f80323d9..a69edff195 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -430,6 +430,39 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
#undef R
};
+ /* Tx modes */
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
event_dev->enqueue = cn9k_sso_hws_enq;
event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
@@ -510,6 +543,25 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
}
}
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
+
if (dev->dual_ws) {
event_dev->enqueue = cn9k_sso_hws_dual_enq;
event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst;
@@ -618,8 +670,37 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
NIX_RX_OFFLOAD_RSS_F)];
}
}
+
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM]
+ */
+ event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
}
+ event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
rte_mb();
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index c01c00e1da..3f9751211a 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -11,6 +11,7 @@
#include "cn9k_ethdev.h"
#include "cn9k_rx.h"
+#include "cn9k_tx.h"
/* SSO Operations */
@@ -416,4 +417,100 @@ NIX_RX_FASTPATH_MODES
NIX_RX_FASTPATH_MODES
#undef R
+static __rte_always_inline void
+cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq)
+{
+ while (!(((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)
+ << (txq)->sqes_per_sqb_log2))
+ ;
+}
+
+static __rte_always_inline const struct cn9k_eth_txq *
+cn9k_sso_hws_xtract_meta(struct rte_mbuf *m,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+ return (const struct cn9k_eth_txq *)
+ txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline void
+cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m,
+ uint64_t *cmd, const uint32_t flags)
+{
+ roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags));
+ cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt);
+}
+
+static __rte_always_inline uint16_t
+cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ struct rte_mbuf *m = ev->mbuf;
+ const struct cn9k_eth_txq *txq;
+ uint16_t ref_cnt = m->refcnt;
+
+ /* Perform header writes before barrier for TSO */
+ cn9k_nix_xmit_prepare_tso(m, flags);
+ /* Lets commit any changes in the packet here in case when
+ * fast free is set as no further changes will be made to mbuf.
+ * In case of fast free is not set, both cn9k_nix_prepare_mseg()
+ * and cn9k_nix_xmit_prepare() has a barrier after refcnt update.
+ */
+ if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
+ rte_io_wmb();
+ txq = cn9k_sso_hws_xtract_meta(m, txq_data);
+ cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags);
+
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
+ if (!CNXK_TT_FROM_EVENT(ev->event)) {
+ cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
+ cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ cn9k_sso_txq_fc_wait(txq);
+ if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+ cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
+ txq->io_addr, segdw);
+ } else {
+ cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr,
+ segdw);
+ }
+ } else {
+ if (!CNXK_TT_FROM_EVENT(ev->event)) {
+ cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
+ cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ cn9k_sso_txq_fc_wait(txq);
+ if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+ cn9k_nix_xmit_one(cmd, txq->lmt_addr,
+ txq->io_addr, flags);
+ } else {
+ cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr,
+ flags);
+ }
+ }
+
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if (ref_cnt > 1)
+ return 1;
+ }
+
+ cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG,
+ base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+ return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
#endif
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
new file mode 100644
index 0000000000..92e2981f02
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn9k_sso_hws_dual *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base[!ws->vws], &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
new file mode 100644
index 0000000000..dfb574cf95
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn9k_sso_hws_dual *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base[!ws->vws], &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c
new file mode 100644
index 0000000000..3df649c0c8
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
new file mode 100644
index 0000000000..0efe29113e
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 4eb46ae162..7891b749df 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -79,21 +79,20 @@ static __rte_always_inline void
cnxk_sso_hws_head_wait(uintptr_t tag_op)
{
#ifdef RTE_ARCH_ARM64
- uint64_t swtp;
-
- asm volatile(PLT_CPU_FEATURE_PREAMBLE
- " ldr %[swtb], [%[swtp_loc]] \n"
- " tbz %[swtb], 35, done%= \n"
- " sevl \n"
- "rty%=: wfe \n"
- " ldr %[swtb], [%[swtp_loc]] \n"
- " tbnz %[swtb], 35, rty%= \n"
- "done%=: \n"
- : [swtb] "=&r"(swtp)
- : [swtp_loc] "r"(tag_op));
+ uint64_t tag;
+
+ asm volatile(" ldr %[tag], [%[tag_op]] \n"
+ " tbnz %[tag], 35, done%= \n"
+ " sevl \n"
+ "rty%=: wfe \n"
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbz %[tag], 35, rty%= \n"
+ "done%=: \n"
+ : [tag] "=&r"(tag)
+ : [tag_op] "r"(tag_op));
#else
- /* Wait for the SWTAG/SWTAG_FULL operation */
- while (plt_read64(tag_op) & BIT_ULL(35))
+ /* Wait for the HEAD to be set */
+ while (!(plt_read64(tag_op) & BIT_ULL(35)))
;
#endif
}
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index c5c1c0ee8e..13e0634e86 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -17,11 +17,17 @@ sources = files(
'cn9k_worker_dual_deq.c',
'cn9k_worker_dual_deq_burst.c',
'cn9k_worker_dual_deq_tmo.c',
+ 'cn9k_worker_tx_enq.c',
+ 'cn9k_worker_tx_enq_seg.c',
+ 'cn9k_worker_dual_tx_enq.c',
+ 'cn9k_worker_dual_tx_enq_seg.c',
'cn10k_eventdev.c',
'cn10k_worker.c',
'cn10k_worker_deq.c',
'cn10k_worker_deq_burst.c',
'cn10k_worker_deq_tmo.c',
+ 'cn10k_worker_tx_enq.c',
+ 'cn10k_worker_tx_enq_seg.c',
'cnxk_eventdev.c',
'cnxk_eventdev_adptr.c',
'cnxk_eventdev_selftest.c',
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v8 5/7] event/cnxk: add Rx adapter vector support
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (2 preceding siblings ...)
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
@ 2021-07-11 23:29 ` pbhagavatula
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
` (2 subsequent siblings)
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-11 23:29 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add event vector support for cnxk event Rx adapter, add control path
APIs to get vector limits and ability to configure event vectorization
on a given Rx queue.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 106 ++++++++++++++++++++++-
drivers/event/cnxk/cnxk_eventdev.h | 2 +
drivers/event/cnxk/cnxk_eventdev_adptr.c | 25 ++++++
drivers/net/cnxk/cnxk_ethdev.h | 2 +-
4 files changed, 133 insertions(+), 2 deletions(-)
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index e462f770c5..e85fa4785d 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
else
*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
- RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
return 0;
}
@@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn10k_sso_rx_adapter_vector_limits(
+ const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+ struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev;
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (ret)
+ return -ENOTSUP;
+
+ cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+ limits->log2_sz = true;
+ limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+ limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+ limits->min_timeout_ns =
+ (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100;
+ limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns;
+
+ return 0;
+}
+
+static int
+cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
+ uint16_t port_id, uint16_t rq_id, uint16_t sz,
+ uint64_t tmo_ns, struct rte_mempool *vmp)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+
+ if (!rq->sso_ena)
+ return -EINVAL;
+ if (rq->flow_tag_width == 0)
+ return -EINVAL;
+
+ rq->vwqe_ena = 1;
+ rq->vwqe_first_skip = 0;
+ rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id);
+ rq->vwqe_max_sz_exp = rte_log2_u32(sz);
+ rq->vwqe_wait_tmo =
+ tmo_ns /
+ ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100);
+ rq->tag_mask = (port_id & 0xF) << 20;
+ rq->tag_mask |=
+ (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4))
+ << 24;
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+static int
+cn10k_sso_rx_adapter_vector_config(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_event_vector_config *config)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev;
+ struct cnxk_sso_evdev *dev;
+ int i, rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ dev = cnxk_sso_pmd_priv(event_dev);
+ cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+ cnxk_sso_updt_xae_cnt(dev, config->vector_mp,
+ RTE_EVENT_TYPE_ETHDEV_VECTOR);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc = cnxk_sso_rx_adapter_vwqe_enable(
+ cnxk_eth_dev, eth_dev->data->port_id, i,
+ config->vector_sz, config->vector_timeout_ns,
+ config->vector_mp);
+ if (rc)
+ return -EINVAL;
+ }
+ } else {
+
+ cnxk_sso_updt_xae_cnt(dev, config->vector_mp,
+ RTE_EVENT_TYPE_ETHDEV_VECTOR);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc = cnxk_sso_rx_adapter_vwqe_enable(
+ cnxk_eth_dev, eth_dev->data->port_id, rx_queue_id,
+ config->vector_sz, config->vector_timeout_ns,
+ config->vector_mp);
+ if (rc)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int
cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
const struct rte_eth_dev *eth_dev, uint32_t *caps)
@@ -739,6 +839,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_rx_adapter_vector_limits_get = cn10k_sso_rx_adapter_vector_limits,
+ .eth_rx_adapter_event_vector_config =
+ cn10k_sso_rx_adapter_vector_config,
+
.eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get,
.eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add,
.eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 24e1be6a97..fc49b88d6f 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -97,6 +97,8 @@ struct cnxk_sso_evdev {
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
+ uint16_t vec_pool_cnt;
+ uint64_t *vec_pools;
/* Dev args */
uint32_t xae_cnt;
uint8_t qos_queue_cnt;
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 502da272d8..baf2f2aa6b 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -38,6 +38,31 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
dev->adptr_xae_cnt += rxq->qconf.mp->size;
break;
}
+ case RTE_EVENT_TYPE_ETHDEV_VECTOR: {
+ struct rte_mempool *mp = data;
+ uint64_t *old_ptr;
+
+ for (i = 0; i < dev->vec_pool_cnt; i++) {
+ if ((uint64_t)mp == dev->vec_pools[i])
+ return;
+ }
+
+ dev->vec_pool_cnt++;
+ old_ptr = dev->vec_pools;
+ dev->vec_pools =
+ rte_realloc(dev->vec_pools,
+ sizeof(uint64_t) * dev->vec_pool_cnt, 0);
+ if (dev->vec_pools == NULL) {
+ dev->adptr_xae_cnt += mp->size;
+ dev->vec_pools = old_ptr;
+ dev->vec_pool_cnt--;
+ return;
+ }
+ dev->vec_pools[dev->vec_pool_cnt - 1] = (uint64_t)mp;
+
+ dev->adptr_xae_cnt += mp->size;
+ break;
+ }
case RTE_EVENT_TYPE_TIMER: {
struct cnxk_tim_ring *timr = data;
uint16_t *old_ring_ptr;
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 4eead03905..2528b3cdaa 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -238,7 +238,7 @@ struct cnxk_eth_txq_sp {
} __plt_cache_aligned;
static inline struct cnxk_eth_dev *
-cnxk_eth_pmd_priv(struct rte_eth_dev *eth_dev)
+cnxk_eth_pmd_priv(const struct rte_eth_dev *eth_dev)
{
return eth_dev->data->dev_private;
}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v8 6/7] event/cnxk: add Rx event vector fastpath
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (3 preceding siblings ...)
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
@ 2021-07-11 23:29 ` pbhagavatula
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 7/7] event/cnxk: add Tx " pbhagavatula
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 1/7] event/cnxk: add Rx adapter support pbhagavatula
6 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-11 23:29 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add Rx event vector fastpath to convert HW defined metadata into
rte_mbuf and rte_event_vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_worker.h | 56 ++++++++
drivers/net/cnxk/cn10k_rx.h | 200 ++++++++++++++++-----------
drivers/net/cnxk/cn10k_rx_vec.c | 2 +-
drivers/net/cnxk/cn10k_rx_vec_mseg.c | 5 +-
4 files changed, 178 insertions(+), 85 deletions(-)
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 3c90c85009..7a48a6b17d 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,6 +5,8 @@
#ifndef __CN10K_WORKER_H__
#define __CN10K_WORKER_H__
+#include <rte_vect.h>
+
#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
@@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
mbuf_init | ((uint64_t)port_id) << 48, flags);
}
+static __rte_always_inline void
+cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags,
+ void *lookup_mem, void *tstamp)
+{
+ uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+ struct rte_event_vector *vec;
+ uint16_t nb_mbufs, non_vec;
+ uint64_t **wqe;
+
+ mbuf_init |= ((uint64_t)port_id) << 48;
+ vec = (struct rte_event_vector *)vwqe;
+ wqe = vec->u64s;
+
+ nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+ nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs,
+ flags | NIX_RX_VWQE_F, lookup_mem,
+ tstamp);
+ wqe += nb_mbufs;
+ non_vec = vec->nb_elem - nb_mbufs;
+
+ while (non_vec) {
+ struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+ struct rte_mbuf *mbuf;
+ uint64_t tstamp_ptr;
+
+ mbuf = (struct rte_mbuf *)((char *)cqe -
+ sizeof(struct rte_mbuf));
+ cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem,
+ mbuf_init, flags);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ wqe[0] = (uint64_t *)mbuf;
+ non_vec--;
+ wqe++;
+ }
+}
+
static __rte_always_inline uint16_t
cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
const uint32_t flags, void *lookup_mem)
@@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
flags & NIX_RX_MULTI_SEG_F,
(uint64_t *)tstamp_ptr);
gw.u64[1] = mbuf;
+ } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+ __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1];
+
+ vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) |
+ ((vwqe_hdr & 0xFFFF) << 48) |
+ ((uint64_t)port << 32);
+ *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr;
+ cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem,
+ ws->tstamp);
}
}
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index d9572b19e7..4c5288b2cc 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -21,6 +21,7 @@
* Defining it from backwards to denote its been
* not used as offload flags to pick function
*/
+#define NIX_RX_VWQE_F BIT(14)
#define NIX_RX_MULTI_SEG_F BIT(15)
#define CNXK_NIX_CQ_ENTRY_SZ 128
@@ -28,6 +29,11 @@
#define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x))
#define CQE_SZ(x) ((x) * CNXK_NIX_CQ_ENTRY_SZ)
+#define CQE_PTR_OFF(b, i, o, f) \
+ (((f) & NIX_RX_VWQE_F) ? \
+ (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \
+ (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o)))
+
union mbuf_initializer {
struct {
uint16_t data_off;
@@ -317,61 +323,87 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf)
}
static __rte_always_inline uint16_t
-cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t pkts, const uint16_t flags)
+cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts,
+ const uint16_t flags, void *lookup_mem,
+ struct cnxk_timesync_info *tstamp)
{
- struct cn10k_eth_rxq *rxq = rx_queue;
- uint16_t packets = 0;
+ struct cn10k_eth_rxq *rxq = args;
+ const uint64_t mbuf_initializer = (flags & NIX_RX_VWQE_F) ?
+ *(uint64_t *)args :
+ rxq->mbuf_initializer;
+ const uint64x2_t data_off = flags & NIX_RX_VWQE_F ?
+ vdupq_n_u64(0x80ULL) :
+ vdupq_n_u64(rxq->data_off);
+ const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask;
+ const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata;
+ const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc;
uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23;
- const uint64_t mbuf_initializer = rxq->mbuf_initializer;
- const uint64x2_t data_off = vdupq_n_u64(rxq->data_off);
uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3;
uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer);
struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3;
- const uint16_t *lookup_mem = rxq->lookup_mem;
- const uint32_t qmask = rxq->qmask;
- const uint64_t wdata = rxq->wdata;
- const uintptr_t desc = rxq->desc;
uint8x16_t f0, f1, f2, f3;
- uint32_t head = rxq->head;
+ uint16_t packets = 0;
uint16_t pkts_left;
-
- pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
- pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
-
- /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
- pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ uint32_t head;
+ uintptr_t cq0;
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ lookup_mem = rxq->lookup_mem;
+ head = rxq->head;
+
+ pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
+ pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
+ /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+ tstamp = rxq->tstamp;
+ } else {
+ RTE_SET_USED(head);
+ }
while (packets < pkts) {
- /* Exit loop if head is about to wrap and become unaligned */
- if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
- NIX_DESCS_PER_LOOP) {
- pkts_left += (pkts - packets);
- break;
- }
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Exit loop if head is about to wrap and become
+ * unaligned.
+ */
+ if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
+ NIX_DESCS_PER_LOOP) {
+ pkts_left += (pkts - packets);
+ break;
+ }
- const uintptr_t cq0 = desc + CQE_SZ(head);
+ cq0 = desc + CQE_SZ(head);
+ } else {
+ cq0 = (uintptr_t)&mbufs[packets];
+ }
/* Prefetch N desc ahead */
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(8)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(9)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(10)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(11)));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 8, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 9, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 10, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 11, 0, flags));
/* Get NIX_RX_SG_S for size and buffer pointer */
- cq0_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(0) + 64));
- cq1_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(1) + 64));
- cq2_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(2) + 64));
- cq3_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(3) + 64));
-
- /* Extract mbuf from NIX_RX_SG_S */
- mbuf01 = vzip2q_u64(cq0_w8, cq1_w8);
- mbuf23 = vzip2q_u64(cq2_w8, cq3_w8);
- mbuf01 = vqsubq_u64(mbuf01, data_off);
- mbuf23 = vqsubq_u64(mbuf23, data_off);
+ cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags));
+ cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags));
+ cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags));
+ cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags));
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Extract mbuf from NIX_RX_SG_S */
+ mbuf01 = vzip2q_u64(cq0_w8, cq1_w8);
+ mbuf23 = vzip2q_u64(cq2_w8, cq3_w8);
+ mbuf01 = vqsubq_u64(mbuf01, data_off);
+ mbuf23 = vqsubq_u64(mbuf23, data_off);
+ } else {
+ mbuf01 =
+ vsubq_u64(vld1q_u64((uint64_t *)cq0), data_off);
+ mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)),
+ data_off);
+ }
/* Move mbufs to scalar registers for future use */
mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0);
@@ -395,14 +427,14 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
f3 = vqtbl1q_u8(cq3_w8, shuf_msk);
/* Load CQE word0 and word 1 */
- uint64_t cq0_w0 = ((uint64_t *)(cq0 + CQE_SZ(0)))[0];
- uint64_t cq0_w1 = ((uint64_t *)(cq0 + CQE_SZ(0)))[1];
- uint64_t cq1_w0 = ((uint64_t *)(cq0 + CQE_SZ(1)))[0];
- uint64_t cq1_w1 = ((uint64_t *)(cq0 + CQE_SZ(1)))[1];
- uint64_t cq2_w0 = ((uint64_t *)(cq0 + CQE_SZ(2)))[0];
- uint64_t cq2_w1 = ((uint64_t *)(cq0 + CQE_SZ(2)))[1];
- uint64_t cq3_w0 = ((uint64_t *)(cq0 + CQE_SZ(3)))[0];
- uint64_t cq3_w1 = ((uint64_t *)(cq0 + CQE_SZ(3)))[1];
+ const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags);
+ const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 8, flags);
+ const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags);
+ const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 8, flags);
+ const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags);
+ const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 8, flags);
+ const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags);
+ const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 8, flags);
if (flags & NIX_RX_OFFLOAD_RSS_F) {
/* Fill rss in the rx_descriptor_fields1 */
@@ -459,17 +491,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) {
ol_flags0 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(0) + 38), ol_flags0,
- mbuf0);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags),
+ ol_flags0, mbuf0);
ol_flags1 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(1) + 38), ol_flags1,
- mbuf1);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags),
+ ol_flags1, mbuf1);
ol_flags2 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(2) + 38), ol_flags2,
- mbuf2);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags),
+ ol_flags2, mbuf2);
ol_flags3 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(3) + 38), ol_flags3,
- mbuf3);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags),
+ ol_flags3, mbuf3);
}
if (flags & NIX_RX_OFFLOAD_TSTAMP_F) {
@@ -488,7 +520,7 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
RTE_PTYPE_L2_ETHER_TIMESYNC};
const uint64_t ts_olf = PKT_RX_IEEE1588_PTP |
PKT_RX_IEEE1588_TMST |
- rxq->tstamp->rx_tstamp_dynflag;
+ tstamp->rx_tstamp_dynflag;
const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8};
uint64x2_t ts01, ts23, mask;
uint64_t ts[4];
@@ -526,14 +558,10 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
ts[3] = vgetq_lane_u64(ts23, 1);
/* Store timestamp into dynfield. */
- *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) =
- ts[0];
- *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) =
- ts[1];
- *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) =
- ts[2];
- *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) =
- ts[3];
+ *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0];
+ *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1];
+ *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2];
+ *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3];
/* Generate ptype mask to filter L2 ether timesync */
mask = vdupq_n_u32(vgetq_lane_u32(f0, 0));
@@ -559,9 +587,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
/* Update Rxq timestamp with the latest
* timestamp.
*/
- rxq->tstamp->rx_ready = 1;
- rxq->tstamp->rx_tstamp =
- ts[31 - __builtin_clz(res)];
+ tstamp->rx_ready = 1;
+ tstamp->rx_tstamp = ts[31 - __builtin_clz(res)];
}
}
@@ -584,25 +611,25 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
/* Store the mbufs to rx_pkts */
- vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
- vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
+ vst1q_u64((uint64_t *)&mbufs[packets], mbuf01);
+ vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23);
if (flags & NIX_RX_MULTI_SEG_F) {
/* Multi segment is enable build mseg list for
* individual mbufs in scalar mode.
*/
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(0) + 8), mbuf0,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 0, 8, flags)),
+ mbuf0, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(1) + 8), mbuf1,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 1, 8, flags)),
+ mbuf1, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(2) + 8), mbuf2,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 2, 8, flags)),
+ mbuf2, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(3) + 8), mbuf3,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 3, 8, flags)),
+ mbuf3, mbuf_initializer, flags);
} else {
/* Update that no more segments */
mbuf0->next = NULL;
@@ -623,12 +650,18 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
__mempool_check_cookies(mbuf2->pool, (void **)&mbuf2, 1, 1);
__mempool_check_cookies(mbuf3->pool, (void **)&mbuf3, 1, 1);
- /* Advance head pointer and packets */
- head += NIX_DESCS_PER_LOOP;
- head &= qmask;
packets += NIX_DESCS_PER_LOOP;
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Advance head pointer and packets */
+ head += NIX_DESCS_PER_LOOP;
+ head &= qmask;
+ }
}
+ if (flags & NIX_RX_VWQE_F)
+ return packets;
+
rxq->head = head;
rxq->available -= packets;
@@ -637,8 +670,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
plt_write64((rxq->wdata | packets), rxq->cq_door);
if (unlikely(pkts_left))
- packets += cn10k_nix_recv_pkts(rx_queue, &rx_pkts[packets],
- pkts_left, flags);
+ packets += cn10k_nix_recv_pkts(args, &mbufs[packets], pkts_left,
+ flags);
return packets;
}
@@ -647,12 +680,15 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
static inline uint16_t
cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t pkts, const uint16_t flags)
+ uint16_t pkts, const uint16_t flags,
+ void *lookup_mem, void *tstamp)
{
+ RTE_SET_USED(lookup_mem);
RTE_SET_USED(rx_queue);
RTE_SET_USED(rx_pkts);
RTE_SET_USED(pkts);
RTE_SET_USED(flags);
+ RTE_SET_USED(tstamp);
return 0;
}
diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c
index 93528a44f9..166735ad59 100644
--- a/drivers/net/cnxk/cn10k_rx_vec.c
+++ b/drivers/net/cnxk/cn10k_rx_vec.c
@@ -12,7 +12,7 @@
uint16_t pkts) \
{ \
return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
- (flags)); \
+ (flags), NULL, NULL); \
}
NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
index 04d1e46c82..1f44dddddd 100644
--- a/drivers/net/cnxk/cn10k_rx_vec_mseg.c
+++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
@@ -9,8 +9,9 @@
uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
{ \
- return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
- (flags) | NIX_RX_MULTI_SEG_F); \
+ return cn10k_nix_recv_pkts_vector( \
+ rx_queue, rx_pkts, pkts, (flags) | NIX_RX_MULTI_SEG_F, \
+ NULL, NULL); \
}
NIX_RX_FASTPATH_MODES
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v8 7/7] event/cnxk: add Tx event vector fastpath
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (4 preceding siblings ...)
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
@ 2021-07-11 23:29 ` pbhagavatula
2021-07-13 13:36 ` Jerin Jacob
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 1/7] event/cnxk: add Rx adapter support pbhagavatula
6 siblings, 1 reply; 93+ messages in thread
From: pbhagavatula @ 2021-07-11 23:29 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
Satha Rao, Pavan Nikhilesh, Shijith Thotton
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add Tx event vector fastpath, integrate event vector Tx routine
into Tx burst.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/common/cnxk/roc_sso.h | 23 ++++++
drivers/event/cnxk/cn10k_eventdev.c | 3 +-
drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++++--
drivers/event/cnxk/cn9k_worker.h | 4 +-
drivers/event/cnxk/cnxk_worker.h | 22 ------
drivers/net/cnxk/cn10k_tx.c | 2 +-
drivers/net/cnxk/cn10k_tx.h | 52 +++++++++-----
drivers/net/cnxk/cn10k_tx_mseg.c | 3 +-
drivers/net/cnxk/cn10k_tx_vec.c | 2 +-
drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +-
10 files changed, 165 insertions(+), 52 deletions(-)
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index a6030e7d8a..316c6ccd59 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -44,6 +44,29 @@ struct roc_sso {
uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
} __plt_cache_aligned;
+static __rte_always_inline void
+roc_sso_hws_head_wait(uintptr_t tag_op)
+{
+#ifdef RTE_ARCH_ARM64
+ uint64_t tag;
+
+ asm volatile(PLT_CPU_FEATURE_PREAMBLE
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbnz %[tag], 35, done%= \n"
+ " sevl \n"
+ "rty%=: wfe \n"
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbz %[tag], 35, rty%= \n"
+ "done%=: \n"
+ : [tag] "=&r"(tag)
+ : [tag_op] "r"(tag_op));
+#else
+ /* Wait for the SWTAG/SWTAG_FULL operation */
+ while (!(plt_read64(tag_op) & BIT_ULL(35)))
+ ;
+#endif
+}
+
/* SSO device initialization */
int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso);
int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso);
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index e85fa4785d..6f37c5bd23 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
if (ret)
*caps = 0;
else
- *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
return 0;
}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 7a48a6b17d..9cc0992063 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
NIX_RX_FASTPATH_MODES
#undef R
-static __rte_always_inline const struct cn10k_eth_txq *
+static __rte_always_inline struct cn10k_eth_txq *
cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
{
- return (const struct cn10k_eth_txq *)
+ return (struct cn10k_eth_txq *)
txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
}
+static __rte_always_inline void
+cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+ uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr,
+ uint8_t sched_type, uintptr_t base,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ uint16_t port[4], queue[4];
+ struct cn10k_eth_txq *txq;
+ uint16_t i, j;
+ uintptr_t pa;
+
+ for (i = 0; i < nb_mbufs; i += 4) {
+ port[0] = mbufs[i]->port;
+ port[1] = mbufs[i + 1]->port;
+ port[2] = mbufs[i + 2]->port;
+ port[3] = mbufs[i + 3]->port;
+
+ queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
+ queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]);
+ queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]);
+ queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]);
+
+ if (((port[0] ^ port[1]) & (port[2] ^ port[3])) ||
+ ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) {
+
+ for (j = 0; j < 4; j++) {
+ struct rte_mbuf *m = mbufs[i + j];
+
+ txq = (struct cn10k_eth_txq *)
+ txq_data[port[j]][queue[j]];
+ cn10k_nix_tx_skeleton(txq, cmd, flags);
+ /* Perform header writes before barrier
+ * for TSO
+ */
+ if (flags & NIX_TX_OFFLOAD_TSO_F)
+ cn10k_nix_xmit_prepare_tso(m, flags);
+
+ cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags,
+ txq->lso_tun_fmt);
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw =
+ cn10k_nix_prepare_mseg(
+ m, (uint64_t *)lmt_addr,
+ flags);
+ pa = txq->io_addr | ((segdw - 1) << 4);
+ } else {
+ pa = txq->io_addr |
+ (cn10k_nix_tx_ext_subs(flags) + 1)
+ << 4;
+ }
+ if (!sched_type)
+ roc_sso_hws_head_wait(base +
+ SSOW_LF_GWS_TAG);
+
+ roc_lmt_submit_steorl(lmt_id, pa);
+ }
+ } else {
+ txq = (struct cn10k_eth_txq *)
+ txq_data[port[0]][queue[0]];
+ cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base
+ + SSOW_LF_GWS_TAG,
+ flags | NIX_TX_VWQE_F);
+ }
+ }
+}
+
static __rte_always_inline uint16_t
cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
uint64_t *cmd,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
const uint32_t flags)
{
- const struct cn10k_eth_txq *txq;
- struct rte_mbuf *m = ev->mbuf;
- uint16_t ref_cnt = m->refcnt;
+ struct cn10k_eth_txq *txq;
+ struct rte_mbuf *m;
uintptr_t lmt_addr;
+ uint16_t ref_cnt;
uint16_t lmt_id;
uintptr_t pa;
lmt_addr = ws->lmt_base;
ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+ if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
+ struct rte_mbuf **mbufs = ev->vec->mbufs;
+ uint64_t meta = *(uint64_t *)ev->vec;
+
+ if (meta & BIT(31)) {
+ txq = (struct cn10k_eth_txq *)
+ txq_data[meta >> 32][meta >> 48];
+
+ cn10k_nix_xmit_pkts_vector(
+ txq, mbufs, meta & 0xFFFF, cmd,
+ ws->tx_base + SSOW_LF_GWS_TAG,
+ flags | NIX_TX_VWQE_F);
+ } else {
+ cn10k_sso_vwqe_split_tx(
+ mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr,
+ ev->sched_type, ws->tx_base, txq_data, flags);
+ }
+ rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+ return (meta & 0xFFFF);
+ }
+
+ m = ev->mbuf;
+ ref_cnt = m->refcnt;
txq = cn10k_sso_hws_xtract_meta(m, txq_data);
cn10k_nix_tx_skeleton(txq, cmd, flags);
/* Perform header writes before barrier for TSO */
@@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
}
if (!ev->sched_type)
- cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
roc_lmt_submit_steorl(lmt_id, pa);
@@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
-
return 1;
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 3f9751211a..cc1e141957 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -466,7 +466,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
- cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
cn9k_sso_txq_fc_wait(txq);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
@@ -478,7 +478,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
} else {
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
- cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
cn9k_sso_txq_fc_wait(txq);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_one(cmd, txq->lmt_addr,
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 7891b749df..9f9ceab8a1 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -75,26 +75,4 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
#endif
}
-static __rte_always_inline void
-cnxk_sso_hws_head_wait(uintptr_t tag_op)
-{
-#ifdef RTE_ARCH_ARM64
- uint64_t tag;
-
- asm volatile(" ldr %[tag], [%[tag_op]] \n"
- " tbnz %[tag], 35, done%= \n"
- " sevl \n"
- "rty%=: wfe \n"
- " ldr %[tag], [%[tag_op]] \n"
- " tbz %[tag], 35, rty%= \n"
- "done%=: \n"
- : [tag] "=&r"(tag)
- : [tag_op] "r"(tag_op));
-#else
- /* Wait for the HEAD to be set */
- while (!(plt_read64(tag_op) & BIT_ULL(35)))
- ;
-#endif
-}
-
#endif
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 1f30bab59a..0e1276c60b 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -16,7 +16,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \
- flags); \
+ 0, flags); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index eb148b8e77..f75cae07ae 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -18,6 +18,7 @@
* Defining it from backwards to denote its been
* not used as offload flags to pick function
*/
+#define NIX_TX_VWQE_F BIT(14)
#define NIX_TX_MULTI_SEG_F BIT(15)
#define NIX_TX_NEED_SEND_HDR_W1 \
@@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
- uint64_t *cmd, const uint16_t flags)
+ uint64_t *cmd, uintptr_t base, const uint16_t flags)
{
struct cn10k_eth_txq *txq = tx_queue;
const rte_iova_t io_addr = txq->io_addr;
@@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
uint64_t lso_tun_fmt;
uint64_t data;
- NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ if (!(flags & NIX_TX_VWQE_F)) {
+ NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ /* Reduce the cached count */
+ txq->fc_cache_pkts -= pkts;
+ }
/* Get cmd skeleton */
cn10k_nix_tx_skeleton(txq, cmd, flags);
- /* Reduce the cached count */
- txq->fc_cache_pkts -= pkts;
-
if (flags & NIX_TX_OFFLOAD_TSO_F)
lso_tun_fmt = txq->lso_tun_fmt;
@@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
}
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
/* Trigger LMTST */
if (burst > 16) {
data = cn10k_nix_tx_steor_data(flags);
@@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
struct cn10k_eth_txq *txq = tx_queue;
uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
@@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
shft += 3;
}
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
data0 = (uint64_t)data128;
data1 = (uint64_t)(data128 >> 64);
/* Make data0 similar to data1 */
@@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
@@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64_t data[2];
} wd;
- NIX_XMIT_FC_OR_RETURN(txq, pkts);
-
- scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
- pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ if (!(flags & NIX_TX_VWQE_F)) {
+ NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ /* Reduce the cached count */
+ txq->fc_cache_pkts -= pkts;
+ } else {
+ scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ }
- /* Reduce the cached count */
- txq->fc_cache_pkts -= pkts;
/* Perform header writes before barrier for TSO */
if (flags & NIX_TX_OFFLOAD_TSO_F) {
for (i = 0; i < pkts; i++)
@@ -1973,6 +1987,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (flags & NIX_TX_MULTI_SEG_F)
wd.data[0] >>= 16;
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
/* Trigger LMTST */
if (lnum > 16) {
if (!(flags & NIX_TX_MULTI_SEG_F))
@@ -2029,10 +2046,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (unlikely(scalar)) {
if (flags & NIX_TX_MULTI_SEG_F)
pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
- scalar, cmd, flags);
+ scalar, cmd, base,
+ flags);
else
pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
- cmd, flags);
+ cmd, base, flags);
}
return pkts;
@@ -2041,13 +2059,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
#else
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
RTE_SET_USED(tx_queue);
RTE_SET_USED(tx_pkts);
RTE_SET_USED(pkts);
RTE_SET_USED(cmd);
RTE_SET_USED(flags);
+ RTE_SET_USED(base);
return 0;
}
#endif
diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c
index 33f6754722..4ea4c8a4e5 100644
--- a/drivers/net/cnxk/cn10k_tx_mseg.c
+++ b/drivers/net/cnxk/cn10k_tx_mseg.c
@@ -18,7 +18,8 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \
- (flags) | NIX_TX_MULTI_SEG_F); \
+ 0, (flags) \
+ | NIX_TX_MULTI_SEG_F); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index 34e3737501..a0350496ab 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -18,7 +18,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
- (flags)); \
+ 0, (flags)); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
index 1fad81dbad..7f98f79b97 100644
--- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c
+++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
@@ -16,7 +16,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector( \
- tx_queue, tx_pkts, pkts, cmd, \
+ tx_queue, tx_pkts, pkts, cmd, 0, \
(flags) | NIX_TX_MULTI_SEG_F); \
}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* Re: [dpdk-dev] [PATCH v8 7/7] event/cnxk: add Tx event vector fastpath
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 7/7] event/cnxk: add Tx " pbhagavatula
@ 2021-07-13 13:36 ` Jerin Jacob
0 siblings, 0 replies; 93+ messages in thread
From: Jerin Jacob @ 2021-07-13 13:36 UTC (permalink / raw)
To: Pavan Nikhilesh
Cc: Jerin Jacob, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
Satha Rao, Shijith Thotton, dpdk-dev
On Mon, Jul 12, 2021 at 5:01 AM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Add Tx event vector fastpath, integrate event vector Tx routine
> into Tx burst.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.**
* Added Rx/Tx adapter support for event/cnxk when the ethernet
device requested
is net/cnxk.
Documentation build issue here
[2757/2784] Generating html_guides with a custom command
FAILED: doc/guides/html
/usr/bin/python ../buildtools/call-sphinx-build.py
/usr/bin/sphinx-build 21.08.0-rc1
/export/dpdk-next-eventdev/doc/guides
/export/dpdk-next-eventdev/build/doc/guides -W
Warning, treated as error:
/export/dpdk-next-eventdev/doc/guides/rel_notes/release_21_08.rst:122:Unexpected
indentation.
> ---
> drivers/common/cnxk/roc_sso.h | 23 ++++++
> drivers/event/cnxk/cn10k_eventdev.c | 3 +-
> drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++++--
> drivers/event/cnxk/cn9k_worker.h | 4 +-
> drivers/event/cnxk/cnxk_worker.h | 22 ------
> drivers/net/cnxk/cn10k_tx.c | 2 +-
> drivers/net/cnxk/cn10k_tx.h | 52 +++++++++-----
> drivers/net/cnxk/cn10k_tx_mseg.c | 3 +-
> drivers/net/cnxk/cn10k_tx_vec.c | 2 +-
> drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +-
> 10 files changed, 165 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
> index a6030e7d8a..316c6ccd59 100644
> --- a/drivers/common/cnxk/roc_sso.h
> +++ b/drivers/common/cnxk/roc_sso.h
> @@ -44,6 +44,29 @@ struct roc_sso {
> uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
> } __plt_cache_aligned;
>
> +static __rte_always_inline void
> +roc_sso_hws_head_wait(uintptr_t tag_op)
> +{
> +#ifdef RTE_ARCH_ARM64
> + uint64_t tag;
> +
> + asm volatile(PLT_CPU_FEATURE_PREAMBLE
> + " ldr %[tag], [%[tag_op]] \n"
> + " tbnz %[tag], 35, done%= \n"
> + " sevl \n"
> + "rty%=: wfe \n"
> + " ldr %[tag], [%[tag_op]] \n"
> + " tbz %[tag], 35, rty%= \n"
> + "done%=: \n"
> + : [tag] "=&r"(tag)
> + : [tag_op] "r"(tag_op));
> +#else
> + /* Wait for the SWTAG/SWTAG_FULL operation */
> + while (!(plt_read64(tag_op) & BIT_ULL(35)))
> + ;
> +#endif
> +}
> +
> /* SSO device initialization */
> int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso);
> int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso);
> diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
> index e85fa4785d..6f37c5bd23 100644
> --- a/drivers/event/cnxk/cn10k_eventdev.c
> +++ b/drivers/event/cnxk/cn10k_eventdev.c
> @@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
> if (ret)
> *caps = 0;
> else
> - *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
> + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
> + RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
>
> return 0;
> }
> diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
> index 7a48a6b17d..9cc0992063 100644
> --- a/drivers/event/cnxk/cn10k_worker.h
> +++ b/drivers/event/cnxk/cn10k_worker.h
> @@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
> NIX_RX_FASTPATH_MODES
> #undef R
>
> -static __rte_always_inline const struct cn10k_eth_txq *
> +static __rte_always_inline struct cn10k_eth_txq *
> cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
> const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
> {
> - return (const struct cn10k_eth_txq *)
> + return (struct cn10k_eth_txq *)
> txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
> }
>
> +static __rte_always_inline void
> +cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
> + uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr,
> + uint8_t sched_type, uintptr_t base,
> + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
> + const uint32_t flags)
> +{
> + uint16_t port[4], queue[4];
> + struct cn10k_eth_txq *txq;
> + uint16_t i, j;
> + uintptr_t pa;
> +
> + for (i = 0; i < nb_mbufs; i += 4) {
> + port[0] = mbufs[i]->port;
> + port[1] = mbufs[i + 1]->port;
> + port[2] = mbufs[i + 2]->port;
> + port[3] = mbufs[i + 3]->port;
> +
> + queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
> + queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]);
> + queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]);
> + queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]);
> +
> + if (((port[0] ^ port[1]) & (port[2] ^ port[3])) ||
> + ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) {
> +
> + for (j = 0; j < 4; j++) {
> + struct rte_mbuf *m = mbufs[i + j];
> +
> + txq = (struct cn10k_eth_txq *)
> + txq_data[port[j]][queue[j]];
> + cn10k_nix_tx_skeleton(txq, cmd, flags);
> + /* Perform header writes before barrier
> + * for TSO
> + */
> + if (flags & NIX_TX_OFFLOAD_TSO_F)
> + cn10k_nix_xmit_prepare_tso(m, flags);
> +
> + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags,
> + txq->lso_tun_fmt);
> + if (flags & NIX_TX_MULTI_SEG_F) {
> + const uint16_t segdw =
> + cn10k_nix_prepare_mseg(
> + m, (uint64_t *)lmt_addr,
> + flags);
> + pa = txq->io_addr | ((segdw - 1) << 4);
> + } else {
> + pa = txq->io_addr |
> + (cn10k_nix_tx_ext_subs(flags) + 1)
> + << 4;
> + }
> + if (!sched_type)
> + roc_sso_hws_head_wait(base +
> + SSOW_LF_GWS_TAG);
> +
> + roc_lmt_submit_steorl(lmt_id, pa);
> + }
> + } else {
> + txq = (struct cn10k_eth_txq *)
> + txq_data[port[0]][queue[0]];
> + cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base
> + + SSOW_LF_GWS_TAG,
> + flags | NIX_TX_VWQE_F);
> + }
> + }
> +}
> +
> static __rte_always_inline uint16_t
> cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
> uint64_t *cmd,
> const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
> const uint32_t flags)
> {
> - const struct cn10k_eth_txq *txq;
> - struct rte_mbuf *m = ev->mbuf;
> - uint16_t ref_cnt = m->refcnt;
> + struct cn10k_eth_txq *txq;
> + struct rte_mbuf *m;
> uintptr_t lmt_addr;
> + uint16_t ref_cnt;
> uint16_t lmt_id;
> uintptr_t pa;
>
> lmt_addr = ws->lmt_base;
> ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
> +
> + if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
> + struct rte_mbuf **mbufs = ev->vec->mbufs;
> + uint64_t meta = *(uint64_t *)ev->vec;
> +
> + if (meta & BIT(31)) {
> + txq = (struct cn10k_eth_txq *)
> + txq_data[meta >> 32][meta >> 48];
> +
> + cn10k_nix_xmit_pkts_vector(
> + txq, mbufs, meta & 0xFFFF, cmd,
> + ws->tx_base + SSOW_LF_GWS_TAG,
> + flags | NIX_TX_VWQE_F);
> + } else {
> + cn10k_sso_vwqe_split_tx(
> + mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr,
> + ev->sched_type, ws->tx_base, txq_data, flags);
> + }
> + rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
> + return (meta & 0xFFFF);
> + }
> +
> + m = ev->mbuf;
> + ref_cnt = m->refcnt;
> txq = cn10k_sso_hws_xtract_meta(m, txq_data);
> cn10k_nix_tx_skeleton(txq, cmd, flags);
> /* Perform header writes before barrier for TSO */
> @@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
> pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
> }
> if (!ev->sched_type)
> - cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
> + roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
>
> roc_lmt_submit_steorl(lmt_id, pa);
>
> @@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
>
> cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
> ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
> -
> return 1;
> }
>
> diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
> index 3f9751211a..cc1e141957 100644
> --- a/drivers/event/cnxk/cn9k_worker.h
> +++ b/drivers/event/cnxk/cn9k_worker.h
> @@ -466,7 +466,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
> const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
> if (!CNXK_TT_FROM_EVENT(ev->event)) {
> cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
> - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> cn9k_sso_txq_fc_wait(txq);
> if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
> cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
> @@ -478,7 +478,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
> } else {
> if (!CNXK_TT_FROM_EVENT(ev->event)) {
> cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
> - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> cn9k_sso_txq_fc_wait(txq);
> if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
> cn9k_nix_xmit_one(cmd, txq->lmt_addr,
> diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
> index 7891b749df..9f9ceab8a1 100644
> --- a/drivers/event/cnxk/cnxk_worker.h
> +++ b/drivers/event/cnxk/cnxk_worker.h
> @@ -75,26 +75,4 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
> #endif
> }
>
> -static __rte_always_inline void
> -cnxk_sso_hws_head_wait(uintptr_t tag_op)
> -{
> -#ifdef RTE_ARCH_ARM64
> - uint64_t tag;
> -
> - asm volatile(" ldr %[tag], [%[tag_op]] \n"
> - " tbnz %[tag], 35, done%= \n"
> - " sevl \n"
> - "rty%=: wfe \n"
> - " ldr %[tag], [%[tag_op]] \n"
> - " tbz %[tag], 35, rty%= \n"
> - "done%=: \n"
> - : [tag] "=&r"(tag)
> - : [tag_op] "r"(tag_op));
> -#else
> - /* Wait for the HEAD to be set */
> - while (!(plt_read64(tag_op) & BIT_ULL(35)))
> - ;
> -#endif
> -}
> -
> #endif
> diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
> index 1f30bab59a..0e1276c60b 100644
> --- a/drivers/net/cnxk/cn10k_tx.c
> +++ b/drivers/net/cnxk/cn10k_tx.c
> @@ -16,7 +16,7 @@
> !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
> return 0; \
> return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \
> - flags); \
> + 0, flags); \
> }
>
> NIX_TX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
> index eb148b8e77..f75cae07ae 100644
> --- a/drivers/net/cnxk/cn10k_tx.h
> +++ b/drivers/net/cnxk/cn10k_tx.h
> @@ -18,6 +18,7 @@
> * Defining it from backwards to denote its been
> * not used as offload flags to pick function
> */
> +#define NIX_TX_VWQE_F BIT(14)
> #define NIX_TX_MULTI_SEG_F BIT(15)
>
> #define NIX_TX_NEED_SEND_HDR_W1 \
> @@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
>
> static __rte_always_inline uint16_t
> cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
> - uint64_t *cmd, const uint16_t flags)
> + uint64_t *cmd, uintptr_t base, const uint16_t flags)
> {
> struct cn10k_eth_txq *txq = tx_queue;
> const rte_iova_t io_addr = txq->io_addr;
> @@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
> uint64_t lso_tun_fmt;
> uint64_t data;
>
> - NIX_XMIT_FC_OR_RETURN(txq, pkts);
> + if (!(flags & NIX_TX_VWQE_F)) {
> + NIX_XMIT_FC_OR_RETURN(txq, pkts);
> + /* Reduce the cached count */
> + txq->fc_cache_pkts -= pkts;
> + }
>
> /* Get cmd skeleton */
> cn10k_nix_tx_skeleton(txq, cmd, flags);
>
> - /* Reduce the cached count */
> - txq->fc_cache_pkts -= pkts;
> -
> if (flags & NIX_TX_OFFLOAD_TSO_F)
> lso_tun_fmt = txq->lso_tun_fmt;
>
> @@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
> lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
> }
>
> + if (flags & NIX_TX_VWQE_F)
> + roc_sso_hws_head_wait(base);
> +
> /* Trigger LMTST */
> if (burst > 16) {
> data = cn10k_nix_tx_steor_data(flags);
> @@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
>
> static __rte_always_inline uint16_t
> cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
> - uint16_t pkts, uint64_t *cmd, const uint16_t flags)
> + uint16_t pkts, uint64_t *cmd, uintptr_t base,
> + const uint16_t flags)
> {
> struct cn10k_eth_txq *txq = tx_queue;
> uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
> @@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
> shft += 3;
> }
>
> + if (flags & NIX_TX_VWQE_F)
> + roc_sso_hws_head_wait(base);
> +
> data0 = (uint64_t)data128;
> data1 = (uint64_t)(data128 >> 64);
> /* Make data0 similar to data1 */
> @@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
>
> static __rte_always_inline uint16_t
> cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> - uint16_t pkts, uint64_t *cmd, const uint16_t flags)
> + uint16_t pkts, uint64_t *cmd, uintptr_t base,
> + const uint16_t flags)
> {
> uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
> uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
> @@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> uint64_t data[2];
> } wd;
>
> - NIX_XMIT_FC_OR_RETURN(txq, pkts);
> -
> - scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
> - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
> + if (!(flags & NIX_TX_VWQE_F)) {
> + NIX_XMIT_FC_OR_RETURN(txq, pkts);
> + scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
> + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
> + /* Reduce the cached count */
> + txq->fc_cache_pkts -= pkts;
> + } else {
> + scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
> + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
> + }
>
> - /* Reduce the cached count */
> - txq->fc_cache_pkts -= pkts;
> /* Perform header writes before barrier for TSO */
> if (flags & NIX_TX_OFFLOAD_TSO_F) {
> for (i = 0; i < pkts; i++)
> @@ -1973,6 +1987,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> if (flags & NIX_TX_MULTI_SEG_F)
> wd.data[0] >>= 16;
>
> + if (flags & NIX_TX_VWQE_F)
> + roc_sso_hws_head_wait(base);
> +
> /* Trigger LMTST */
> if (lnum > 16) {
> if (!(flags & NIX_TX_MULTI_SEG_F))
> @@ -2029,10 +2046,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> if (unlikely(scalar)) {
> if (flags & NIX_TX_MULTI_SEG_F)
> pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
> - scalar, cmd, flags);
> + scalar, cmd, base,
> + flags);
> else
> pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
> - cmd, flags);
> + cmd, base, flags);
> }
>
> return pkts;
> @@ -2041,13 +2059,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> #else
> static __rte_always_inline uint16_t
> cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> - uint16_t pkts, uint64_t *cmd, const uint16_t flags)
> + uint16_t pkts, uint64_t *cmd, uintptr_t base,
> + const uint16_t flags)
> {
> RTE_SET_USED(tx_queue);
> RTE_SET_USED(tx_pkts);
> RTE_SET_USED(pkts);
> RTE_SET_USED(cmd);
> RTE_SET_USED(flags);
> + RTE_SET_USED(base);
> return 0;
> }
> #endif
> diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c
> index 33f6754722..4ea4c8a4e5 100644
> --- a/drivers/net/cnxk/cn10k_tx_mseg.c
> +++ b/drivers/net/cnxk/cn10k_tx_mseg.c
> @@ -18,7 +18,8 @@
> !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
> return 0; \
> return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \
> - (flags) | NIX_TX_MULTI_SEG_F); \
> + 0, (flags) \
> + | NIX_TX_MULTI_SEG_F); \
> }
>
> NIX_TX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
> index 34e3737501..a0350496ab 100644
> --- a/drivers/net/cnxk/cn10k_tx_vec.c
> +++ b/drivers/net/cnxk/cn10k_tx_vec.c
> @@ -18,7 +18,7 @@
> !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
> return 0; \
> return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
> - (flags)); \
> + 0, (flags)); \
> }
>
> NIX_TX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
> index 1fad81dbad..7f98f79b97 100644
> --- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c
> +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
> @@ -16,7 +16,7 @@
> !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
> return 0; \
> return cn10k_nix_xmit_pkts_vector( \
> - tx_queue, tx_pkts, pkts, cmd, \
> + tx_queue, tx_pkts, pkts, cmd, 0, \
> (flags) | NIX_TX_MULTI_SEG_F); \
> }
>
> --
> 2.17.1
>
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v9 1/7] event/cnxk: add Rx adapter support
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (5 preceding siblings ...)
2021-07-11 23:29 ` [dpdk-dev] [PATCH v8 7/7] event/cnxk: add Tx " pbhagavatula
@ 2021-07-14 9:02 ` pbhagavatula
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
` (5 more replies)
6 siblings, 6 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-14 9:02 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Ray Kinsella
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Rx adapter.
Resize cn10k workslot fastpath structure to fit in 64B cacheline size.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v9 Changes:
- Fix doc compilation.
- Remove rte prefix used in base code.
v8 Changes:
- Fix incorrect cq_w1 offset.
- Move doc changes to 1st patch.
v7 Changes:
- Set correct limits for SQB aura.
v6 Changes:
- More code cleanup.
- Fix incorrect SQB configuration and missing fc check.
v5 Changes:
- Use cnxk_eth_rxq_to_sp instead of manually calculating sp offset.
v4 Changes:
- Split patches for easier merge.
v3 Changes:
- Spell check.
doc/guides/eventdevs/cnxk.rst | 33 +++++
doc/guides/rel_notes/release_21_08.rst | 6 +
drivers/common/cnxk/roc_nix.h | 3 +
drivers/common/cnxk/roc_nix_fc.c | 78 +++++++++++
drivers/common/cnxk/roc_nix_priv.h | 3 +-
drivers/common/cnxk/version.map | 1 +
drivers/event/cnxk/cn10k_eventdev.c | 107 ++++++++++++---
drivers/event/cnxk/cn10k_worker.c | 7 +-
drivers/event/cnxk/cn10k_worker.h | 32 +++--
drivers/event/cnxk/cn9k_eventdev.c | 89 +++++++++++++
drivers/event/cnxk/cn9k_worker.h | 4 +
drivers/event/cnxk/cnxk_eventdev.c | 2 +
drivers/event/cnxk/cnxk_eventdev.h | 43 ++++--
drivers/event/cnxk/cnxk_eventdev_adptr.c | 158 +++++++++++++++++++++++
drivers/event/cnxk/meson.build | 9 +-
15 files changed, 528 insertions(+), 47 deletions(-)
diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 36da3800cc..53560d3830 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -39,6 +39,15 @@ Features of the OCTEON cnxk SSO PMD are:
time granularity of 2.5us on CN9K and 1us on CN10K.
- Up to 256 TIM rings a.k.a event timer adapters.
- Up to 8 rings traversed in parallel.
+- HW managed packets enqueued from ethdev to eventdev exposed through event eth
+ RX adapter.
+- N:1 ethernet device Rx queue to Event queue mapping.
+- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE``
+ capability while maintaining receive packet order.
+- Full Rx/Tx offload support defined through ethdev queue configuration.
+- HW managed event vectorization on CN10K for packets enqueued from ethdev to
+ eventdev configurable per each Rx queue in Rx adapter.
+- Event vector transmission via Tx adapter.
Prerequisites and Compilation procedure
---------------------------------------
@@ -93,6 +102,15 @@ Runtime Config Options
-a 0002:0e:00.0,qos=[1-50-50-50]
+- ``Force Rx Back pressure``
+
+ Force Rx back pressure when same mempool is used across ethernet device
+ connected to event device.
+
+ For example::
+
+ -a 0002:0e:00.0,force_rx_bp=1
+
- ``TIM disable NPA``
By default chunks are allocated from NPA then TIM can automatically free
@@ -160,3 +178,18 @@ Debugging Options
+---+------------+-------------------------------------------------------+
| 2 | TIM | --log-level='pmd\.event\.cnxk\.timer,8' |
+---+------------+-------------------------------------------------------+
+
+Limitations
+-----------
+
+Rx adapter support
+~~~~~~~~~~~~~~~~~~
+
+Using the same mempool for all the ethernet device ports connected to
+event device would cause back pressure to be asserted only on the first
+ethernet device.
+Back pressure is automatically disabled when using same mempool for all the
+ethernet devices connected to event device to override this applications can
+use `force_rx_bp=1` device arguments.
+Using unique mempool per each ethernet device is recommended when they are
+connected to event device.
diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index 6a902ef9ac..d40533ab44 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -117,6 +117,12 @@ New Features
The experimental PMD power management API now supports managing
multiple Ethernet Rx queues per lcore.
+* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.**
+
+ * Added Rx/Tx adapter support for event/cnxk when the ethernet device requested
+ is net/cnxk.
+ * Add support for event vectorization for Rx/Tx adapter.
+
Removed Items
-------------
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index bb69027956..76613fe84e 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix,
enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix);
+void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id,
+ uint8_t ena, uint8_t force);
+
/* NPC */
int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable);
diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c
index 47be8aa3f8..f17eba4169 100644
--- a/drivers/common/cnxk/roc_nix_fc.c
+++ b/drivers/common/cnxk/roc_nix_fc.c
@@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum roc_nix_fc_mode mode)
exit:
return rc;
}
+
+void
+rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena,
+ uint8_t force)
+{
+ struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+ struct npa_lf *lf = idev_npa_obj_get();
+ struct npa_aq_enq_req *req;
+ struct npa_aq_enq_rsp *rsp;
+ struct mbox *mbox;
+ uint32_t limit;
+ int rc;
+
+ if (roc_nix_is_sdp(roc_nix))
+ return;
+
+ if (!lf)
+ return;
+ mbox = lf->mbox;
+
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_READ;
+
+ rc = mbox_process_msg(mbox, (void *)&rsp);
+ if (rc)
+ return;
+
+ limit = rsp->aura.limit;
+ /* BP is already enabled. */
+ if (rsp->aura.bp_ena) {
+ /* If BP ids don't match disable BP. */
+ if ((rsp->aura.nix0_bpid != nix->bpid[0]) && !force) {
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_WRITE;
+
+ req->aura.bp_ena = 0;
+ req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena);
+
+ mbox_process(mbox);
+ }
+ return;
+ }
+
+ /* BP was previously enabled but now disabled skip. */
+ if (rsp->aura.bp)
+ return;
+
+ req = mbox_alloc_msg_npa_aq_enq(mbox);
+ if (req == NULL)
+ return;
+
+ req->aura_id = roc_npa_aura_handle_to_aura(pool_id);
+ req->ctype = NPA_AQ_CTYPE_AURA;
+ req->op = NPA_AQ_INSTOP_WRITE;
+
+ if (ena) {
+ req->aura.nix0_bpid = nix->bpid[0];
+ req->aura_mask.nix0_bpid = ~(req->aura_mask.nix0_bpid);
+ req->aura.bp = NIX_RQ_AURA_THRESH(
+ limit > 128 ? 256 : limit); /* 95% of size*/
+ req->aura_mask.bp = ~(req->aura_mask.bp);
+ }
+
+ req->aura.bp_ena = !!ena;
+ req->aura_mask.bp_ena = ~(req->aura_mask.bp_ena);
+
+ mbox_process(mbox);
+}
diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h
index d9c32df442..9dc0c88a6f 100644
--- a/drivers/common/cnxk/roc_nix_priv.h
+++ b/drivers/common/cnxk/roc_nix_priv.h
@@ -16,7 +16,8 @@
#define NIX_SQB_LOWER_THRESH ((uint16_t)70)
/* Apply BP/DROP when CQ is 95% full */
-#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100)
+#define NIX_CQ_THRESH_LEVEL (5 * 256 / 100)
+#define NIX_RQ_AURA_THRESH(x) (((x) * 95) / 100)
/* IRQ triggered when NIX_LF_CINTX_CNT[QCOUNT] crosses this value */
#define CQ_CQE_THRESH_DEFAULT 0x1ULL
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index e3af48c02e..8ea3e9f439 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -83,6 +83,7 @@ INTERNAL {
roc_nix_fc_config_set;
roc_nix_fc_mode_set;
roc_nix_fc_mode_get;
+ rox_nix_fc_npa_bp_cfg;
roc_nix_get_base_chan;
roc_nix_get_pf;
roc_nix_get_pf_func;
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index bf4052c76c..2060c8fe84 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -6,18 +6,6 @@
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
-static void
-cn10k_init_hws_ops(struct cn10k_sso_hws *ws, uintptr_t base)
-{
- ws->tag_wqe_op = base + SSOW_LF_GWS_WQE0;
- ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0;
- ws->updt_wqe_op = base + SSOW_LF_GWS_OP_UPD_WQP_GRP1;
- ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM;
- ws->swtag_untag_op = base + SSOW_LF_GWS_OP_SWTAG_UNTAG;
- ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH;
- ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED;
-}
-
static uint32_t
cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev)
{
@@ -56,7 +44,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
- cn10k_init_hws_ops(ws, ws->base);
ws->hws_id = port_id;
ws->swtag_req = 0;
ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -135,13 +122,14 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
cq_ds_cnt &= 0x3FFF3FFF0000;
while (aq_cnt || cq_ds_cnt || ds_cnt) {
- plt_write64(req, ws->getwrk_op);
+ plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
cn10k_sso_hws_get_work_empty(ws, &ev);
if (fn != NULL && ev.u64 != 0)
fn(arg, ev);
if (ev.sched_type != SSO_TT_EMPTY)
- cnxk_sso_hws_swtag_flush(ws->tag_wqe_op,
- ws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(
+ ws->base + SSOW_LF_GWS_WQE0,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
do {
val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
} while (val & BIT_ULL(56));
@@ -205,9 +193,11 @@ cn10k_sso_hws_reset(void *arg, void *hws)
if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) !=
SSO_TT_EMPTY) {
- plt_write64(BIT_ULL(16) | 1, ws->getwrk_op);
+ plt_write64(BIT_ULL(16) | 1,
+ ws->base + SSOW_LF_GWS_OP_GET_WORK0);
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
@@ -407,6 +397,80 @@ cn10k_sso_selftest(void)
return cnxk_sso_selftest(RTE_STR(event_cn10k));
}
+static int
+cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int rc;
+
+ RTE_SET_USED(event_dev);
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 9);
+ if (rc)
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+ else
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+ return 0;
+}
+
+static void
+cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem,
+ void *tstmp_info)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ struct cn10k_sso_hws *ws = event_dev->data->ports[i];
+ ws->lookup_mem = lookup_mem;
+ ws->tstamp = tstmp_info;
+ }
+}
+
+static int
+cn10k_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cn10k_eth_rxq *rxq;
+ void *lookup_mem;
+ void *tstmp_info;
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id,
+ queue_conf);
+ if (rc)
+ return -EINVAL;
+ rxq = eth_dev->data->rx_queues[0];
+ lookup_mem = rxq->lookup_mem;
+ tstmp_info = rxq->tstamp;
+ cn10k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info);
+ cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.dev_infos_get = cn10k_sso_info_get,
.dev_configure = cn10k_sso_dev_configure,
@@ -420,6 +484,12 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.port_unlink = cn10k_sso_port_unlink,
.timeout_ticks = cnxk_sso_timeout_ticks,
+ .eth_rx_adapter_caps_get = cn10k_sso_rx_adapter_caps_get,
+ .eth_rx_adapter_queue_add = cn10k_sso_rx_adapter_queue_add,
+ .eth_rx_adapter_queue_del = cn10k_sso_rx_adapter_queue_del,
+ .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+ .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
@@ -502,6 +572,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn10k, cn10k_pci_sso_map);
RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
CNXK_SSO_GGRP_QOS "=<string>"
+ CNXK_SSO_FORCE_BP "=1"
CN10K_SSO_GW_MODE "=<int>"
CNXK_TIM_DISABLE_NPA "=1"
CNXK_TIM_CHNK_SLOTS "=<int>"
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index e2aa534c64..5dbae275ba 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -18,7 +18,8 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev)
cn10k_sso_hws_forward_event(ws, ev);
break;
case RTE_EVENT_OP_RELEASE:
- cnxk_sso_hws_swtag_flush(ws->tag_wqe_op, ws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
break;
default:
return 0;
@@ -69,7 +70,7 @@ cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
if (ws->swtag_req) {
ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
return 1;
}
@@ -94,7 +95,7 @@ cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
if (ws->swtag_req) {
ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
return ret;
}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 2f093a8dd5..c7250bf9e7 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,9 +5,13 @@
#ifndef __CN10K_WORKER_H__
#define __CN10K_WORKER_H__
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
+#include "cn10k_ethdev.h"
+#include "cn10k_rx.h"
+
/* SSO Operations */
static __rte_always_inline uint8_t
@@ -31,7 +35,8 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
{
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(ws->tag_wqe_op));
+ const uint8_t cur_tt =
+ CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0));
/* CNXK model
* cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
@@ -43,9 +48,11 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
if (new_tt == SSO_TT_UNTAGGED) {
if (cur_tt != SSO_TT_UNTAGGED)
- cnxk_sso_hws_swtag_untag(ws->swtag_untag_op);
+ cnxk_sso_hws_swtag_untag(ws->base +
+ SSOW_LF_GWS_OP_SWTAG_UNTAG);
} else {
- cnxk_sso_hws_swtag_norm(tag, new_tt, ws->swtag_norm_op);
+ cnxk_sso_hws_swtag_norm(tag, new_tt,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_NORM);
}
ws->swtag_req = 1;
}
@@ -57,8 +64,9 @@ cn10k_sso_hws_fwd_group(struct cn10k_sso_hws *ws, const struct rte_event *ev,
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- plt_write64(ev->u64, ws->updt_wqe_op);
- cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op);
+ plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+ cnxk_sso_hws_swtag_desched(tag, new_tt, grp,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
}
static __rte_always_inline void
@@ -68,7 +76,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
const uint8_t grp = ev->queue_id;
/* Group hasn't changed, Use SWTAG to forward the event */
- if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_wqe_op)) == grp)
+ if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp)
cn10k_sso_hws_fwd_swtag(ws, ev);
else
/*
@@ -93,12 +101,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
PLT_CPU_FEATURE_PREAMBLE
"caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
: [wdata] "+r"(gw.get_work)
- : [gw_loc] "r"(ws->getwrk_op)
+ : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
: "memory");
#else
- plt_write64(gw.u64[0], ws->getwrk_op);
+ plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
@@ -130,11 +139,12 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
: [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
- : [tag_loc] "r"(ws->tag_wqe_op)
+ : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
: "memory");
#else
do {
- roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
+ roc_load_pair(gw.u64[0], gw.u64[1],
+ ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
#endif
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 0684417eab..072800c243 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -481,6 +481,88 @@ cn9k_sso_selftest(void)
return cnxk_sso_selftest(RTE_STR(event_cn9k));
}
+static int
+cn9k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int rc;
+
+ RTE_SET_USED(event_dev);
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 9);
+ if (rc)
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+ else
+ *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+ return 0;
+}
+
+static void
+cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem,
+ void *tstmp_info)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ if (dev->dual_ws) {
+ struct cn9k_sso_hws_dual *dws =
+ event_dev->data->ports[i];
+ dws->lookup_mem = lookup_mem;
+ dws->tstamp = tstmp_info;
+ } else {
+ struct cn9k_sso_hws *ws = event_dev->data->ports[i];
+ ws->lookup_mem = lookup_mem;
+ ws->tstamp = tstmp_info;
+ }
+ }
+}
+
+static int
+cn9k_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cn9k_eth_rxq *rxq;
+ void *lookup_mem;
+ void *tstmp_info;
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (rc)
+ return -EINVAL;
+
+ rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id,
+ queue_conf);
+ if (rc)
+ return -EINVAL;
+
+ rxq = eth_dev->data->rx_queues[0];
+ lookup_mem = rxq->lookup_mem;
+ tstmp_info = rxq->tstamp;
+ cn9k_sso_set_priv_mem(event_dev, lookup_mem, tstmp_info);
+ cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ int rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (rc)
+ return -EINVAL;
+
+ return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.dev_infos_get = cn9k_sso_info_get,
.dev_configure = cn9k_sso_dev_configure,
@@ -494,6 +576,12 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.port_unlink = cn9k_sso_port_unlink,
.timeout_ticks = cnxk_sso_timeout_ticks,
+ .eth_rx_adapter_caps_get = cn9k_sso_rx_adapter_caps_get,
+ .eth_rx_adapter_queue_add = cn9k_sso_rx_adapter_queue_add,
+ .eth_rx_adapter_queue_del = cn9k_sso_rx_adapter_queue_del,
+ .eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+ .eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
@@ -571,6 +659,7 @@ RTE_PMD_REGISTER_PCI_TABLE(event_cn9k, cn9k_pci_sso_map);
RTE_PMD_REGISTER_KMOD_DEP(event_cn9k, "vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(event_cn9k, CNXK_SSO_XAE_CNT "=<int>"
CNXK_SSO_GGRP_QOS "=<string>"
+ CNXK_SSO_FORCE_BP "=1"
CN9K_SSO_SINGLE_WS "=1"
CNXK_TIM_DISABLE_NPA "=1"
CNXK_TIM_CHNK_SLOTS "=<int>"
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 38fca08fb6..f5a4401465 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -5,9 +5,13 @@
#ifndef __CN9K_WORKER_H__
#define __CN9K_WORKER_H__
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
+#include "cn9k_ethdev.h"
+#include "cn9k_rx.h"
+
/* SSO Operations */
static __rte_always_inline uint8_t
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 7189ee3a79..cfd7fb971c 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -571,6 +571,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
&dev->xae_cnt);
rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict,
dev);
+ rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_value,
+ &dev->force_ena_bp);
rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_value,
&single_ws);
rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 668e51d62a..b65d725f55 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -6,6 +6,8 @@
#define __CNXK_EVENTDEV_H__
#include <rte_devargs.h>
+#include <rte_ethdev.h>
+#include <rte_event_eth_rx_adapter.h>
#include <rte_kvargs.h>
#include <rte_mbuf_pool_ops.h>
#include <rte_pci.h>
@@ -18,6 +20,7 @@
#define CNXK_SSO_XAE_CNT "xae_cnt"
#define CNXK_SSO_GGRP_QOS "qos"
+#define CNXK_SSO_FORCE_BP "force_rx_bp"
#define CN9K_SSO_SINGLE_WS "single_ws"
#define CN10K_SSO_GW_MODE "gw_mode"
@@ -81,7 +84,10 @@ struct cnxk_sso_evdev {
uint64_t nb_xaq_cfg;
rte_iova_t fc_iova;
struct rte_mempool *xaq_pool;
+ uint64_t rx_offloads;
uint64_t adptr_xae_cnt;
+ uint16_t rx_adptr_pool_cnt;
+ uint64_t *rx_adptr_pools;
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
@@ -89,25 +95,18 @@ struct cnxk_sso_evdev {
uint32_t xae_cnt;
uint8_t qos_queue_cnt;
struct cnxk_sso_qos *qos_parse_data;
+ uint8_t force_ena_bp;
/* CN9K */
uint8_t dual_ws;
/* CN10K */
uint8_t gw_mode;
} __rte_cache_aligned;
-/* CN10K HWS ops */
-#define CN10K_SSO_HWS_OPS \
- uintptr_t swtag_desched_op; \
- uintptr_t swtag_flush_op; \
- uintptr_t swtag_untag_op; \
- uintptr_t swtag_norm_op; \
- uintptr_t updt_wqe_op; \
- uintptr_t tag_wqe_op; \
- uintptr_t getwrk_op
-
struct cn10k_sso_hws {
- /* Get Work Fastpath data */
- CN10K_SSO_HWS_OPS;
+ uint64_t base;
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint32_t gw_wdata;
uint8_t swtag_req;
uint8_t hws_id;
@@ -115,7 +114,6 @@ struct cn10k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base;
uintptr_t lmt_base;
} __rte_cache_aligned;
@@ -132,6 +130,9 @@ struct cn10k_sso_hws {
struct cn9k_sso_hws {
/* Get Work Fastpath data */
CN9K_SSO_HWS_OPS;
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint8_t swtag_req;
uint8_t hws_id;
/* Add Work Fastpath data */
@@ -148,6 +149,9 @@ struct cn9k_sso_hws_state {
struct cn9k_sso_hws_dual {
/* Get Work Fastpath data */
struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */
+ /* PTP timestamp */
+ struct cnxk_timesync_info *tstamp;
+ void *lookup_mem;
uint8_t swtag_req;
uint8_t vws; /* Ping pong bit */
uint8_t hws_id;
@@ -250,4 +254,17 @@ int cnxk_sso_xstats_reset(struct rte_eventdev *event_dev,
/* CN9K */
void cn9k_sso_set_rsrc(void *arg);
+/* Common adapter ops */
+int cnxk_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf);
+int cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id);
+int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev);
+int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev);
+
#endif /* __CNXK_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 89a1d82c14..3b7ecb375a 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -2,6 +2,7 @@
* Copyright(C) 2021 Marvell.
*/
+#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
void
@@ -11,6 +12,32 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
int i;
switch (event_type) {
+ case RTE_EVENT_TYPE_ETHDEV: {
+ struct cnxk_eth_rxq_sp *rxq = data;
+ uint64_t *old_ptr;
+
+ for (i = 0; i < dev->rx_adptr_pool_cnt; i++) {
+ if ((uint64_t)rxq->qconf.mp == dev->rx_adptr_pools[i])
+ return;
+ }
+
+ dev->rx_adptr_pool_cnt++;
+ old_ptr = dev->rx_adptr_pools;
+ dev->rx_adptr_pools = rte_realloc(
+ dev->rx_adptr_pools,
+ sizeof(uint64_t) * dev->rx_adptr_pool_cnt, 0);
+ if (dev->rx_adptr_pools == NULL) {
+ dev->adptr_xae_cnt += rxq->qconf.mp->size;
+ dev->rx_adptr_pools = old_ptr;
+ dev->rx_adptr_pool_cnt--;
+ return;
+ }
+ dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] =
+ (uint64_t)rxq->qconf.mp;
+
+ dev->adptr_xae_cnt += rxq->qconf.mp->size;
+ break;
+ }
case RTE_EVENT_TYPE_TIMER: {
struct cnxk_tim_ring *timr = data;
uint16_t *old_ring_ptr;
@@ -65,3 +92,134 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
break;
}
}
+
+static int
+cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id,
+ uint16_t port_id, const struct rte_event *ev,
+ uint8_t custom_flowid)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+ rq->sso_ena = 1;
+ rq->tt = ev->sched_type;
+ rq->hwgrp = ev->queue_id;
+ rq->flow_tag_width = 20;
+ rq->wqe_skip = 1;
+ rq->tag_mask = (port_id & 0xF) << 20;
+ rq->tag_mask |= (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4))
+ << 24;
+
+ if (custom_flowid) {
+ rq->flow_tag_width = 0;
+ rq->tag_mask |= ev->flow_id;
+ }
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+static int
+cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+ rq->sso_ena = 0;
+ rq->flow_tag_width = 32;
+ rq->tag_mask = 0;
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+int
+cnxk_sso_rx_adapter_queue_add(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ uint16_t port = eth_dev->data->port_id;
+ struct cnxk_eth_rxq_sp *rxq_sp;
+ int i, rc = 0;
+
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+ rc |= cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev,
+ i, queue_conf);
+ } else {
+ rxq_sp = cnxk_eth_rxq_to_sp(
+ eth_dev->data->rx_queues[rx_queue_id]);
+ cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc |= cnxk_sso_rxq_enable(
+ cnxk_eth_dev, (uint16_t)rx_queue_id, port,
+ &queue_conf->ev,
+ !!(queue_conf->rx_queue_flags &
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID));
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, true,
+ dev->force_ena_bp);
+ }
+
+ if (rc < 0) {
+ plt_err("Failed to configure Rx adapter port=%d, q=%d", port,
+ queue_conf->ev.queue_id);
+ return rc;
+ }
+
+ dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags;
+
+ return 0;
+}
+
+int
+cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ struct cnxk_eth_rxq_sp *rxq_sp;
+ int i, rc = 0;
+
+ RTE_SET_USED(event_dev);
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+ cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i);
+ } else {
+ rxq_sp = cnxk_eth_rxq_to_sp(
+ eth_dev->data->rx_queues[rx_queue_id]);
+ rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id);
+ rox_nix_fc_npa_bp_cfg(&cnxk_eth_dev->nix,
+ rxq_sp->qconf.mp->pool_id, false,
+ dev->force_ena_bp);
+ }
+
+ if (rc < 0)
+ plt_err("Failed to clear Rx adapter config port=%d, q=%d",
+ eth_dev->data->port_id, rx_queue_id);
+
+ return rc;
+}
+
+int
+cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev)
+{
+ RTE_SET_USED(event_dev);
+ RTE_SET_USED(eth_dev);
+
+ return 0;
+}
+
+int
+cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev)
+{
+ RTE_SET_USED(event_dev);
+ RTE_SET_USED(eth_dev);
+
+ return 0;
+}
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 87bb9f76a9..eda562f5b5 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -21,4 +21,11 @@ sources = files(
'cnxk_tim_worker.c',
)
-deps += ['bus_pci', 'common_cnxk']
+extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
+foreach flag: extra_flags
+ if cc.has_argument(flag)
+ cflags += flag
+ endif
+endforeach
+
+deps += ['bus_pci', 'common_cnxk', 'net_cnxk']
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v9 2/7] event/cnxk: add Rx adapter fastpath ops
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 1/7] event/cnxk: add Rx adapter support pbhagavatula
@ 2021-07-14 9:02 ` pbhagavatula
2021-07-20 11:03 ` David Marchand
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 3/7] event/cnxk: add Tx adapter support pbhagavatula
` (4 subsequent siblings)
5 siblings, 1 reply; 93+ messages in thread
From: pbhagavatula @ 2021-07-14 9:02 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Rx adapter fastpath operations.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 136 +++++++-
drivers/event/cnxk/cn10k_worker.c | 54 ----
drivers/event/cnxk/cn10k_worker.h | 97 +++++-
drivers/event/cnxk/cn10k_worker_deq.c | 44 +++
drivers/event/cnxk/cn10k_worker_deq_burst.c | 29 ++
drivers/event/cnxk/cn10k_worker_deq_tmo.c | 72 +++++
drivers/event/cnxk/cn9k_eventdev.c | 305 +++++++++++++++++-
drivers/event/cnxk/cn9k_worker.c | 117 -------
drivers/event/cnxk/cn9k_worker.h | 174 ++++++++--
drivers/event/cnxk/cn9k_worker_deq.c | 44 +++
drivers/event/cnxk/cn9k_worker_deq_burst.c | 29 ++
drivers/event/cnxk/cn9k_worker_deq_tmo.c | 72 +++++
drivers/event/cnxk/cn9k_worker_dual_deq.c | 53 +++
.../event/cnxk/cn9k_worker_dual_deq_burst.c | 30 ++
drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 89 +++++
drivers/event/cnxk/cnxk_eventdev.h | 1 +
drivers/event/cnxk/meson.build | 9 +
17 files changed, 1124 insertions(+), 231 deletions(-)
create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c
create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c
create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c
create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 2060c8fe84..ba7d95fff7 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -237,17 +237,141 @@ static void
cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst;
-
- event_dev->dequeue = cn10k_sso_hws_deq;
- event_dev->dequeue_burst = cn10k_sso_hws_deq_burst;
- if (dev->is_timeout_deq) {
- event_dev->dequeue = cn10k_sso_hws_tmo_deq;
- event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_deq_seg
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_seg_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_tmo_deq_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_deq
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_tmo_deq
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_tmo_deq_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
}
}
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index 5dbae275ba..c71aa37327 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -60,57 +60,3 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-
-uint16_t __rte_hot
-cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn10k_sso_hws *ws = port;
-
- RTE_SET_USED(timeout_ticks);
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
- return 1;
- }
-
- return cn10k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
- uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn10k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn10k_sso_hws *ws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
- return ret;
- }
-
- ret = cn10k_sso_hws_get_work(ws, ev);
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
- ret = cn10k_sso_hws_get_work(ws, ev);
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index c7250bf9e7..b724083caa 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -87,20 +87,37 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
cn10k_sso_hws_fwd_group(ws, ev, grp);
}
+static __rte_always_inline void
+cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+ const uint32_t tag, const uint32_t flags,
+ const void *const lookup_mem)
+{
+ const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+
+ cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+ (struct rte_mbuf *)mbuf, lookup_mem,
+ mbuf_init | ((uint64_t)port_id) << 48, flags);
+}
+
static __rte_always_inline uint16_t
-cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
+cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
+ const uint32_t flags, void *lookup_mem)
{
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
gw.get_work = ws->gw_wdata;
#if defined(RTE_ARCH_ARM64) && !defined(__clang__)
asm volatile(
PLT_CPU_FEATURE_PREAMBLE
"caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
- : [wdata] "+r"(gw.get_work)
+ "sub %[mbuf], %H[wdata], #0x80 \n"
+ : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf)
: [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
: "memory");
#else
@@ -109,11 +126,34 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
roc_load_pair(gw.u64[0], gw.u64[1],
ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
+ ws->tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -128,6 +168,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t mbuf;
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -138,7 +179,9 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
" ldp %[tag], %[wqp], [%[tag_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
: "memory");
#else
@@ -146,12 +189,25 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
roc_load_pair(gw.u64[0], gw.u64[1],
ws->base + SSOW_LF_GWS_WQE0);
} while (gw.u64[0] & BIT_ULL(63));
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, 0, NULL);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -170,16 +226,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
const struct rte_event ev[],
uint16_t nb_events);
-uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
#endif
diff --git a/drivers/event/cnxk/cn10k_worker_deq.c b/drivers/event/cnxk/cn10k_worker_deq.c
new file mode 100644
index 0000000000..36ec454ccc
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return 1; \
+ } \
+ \
+ return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return 1; \
+ } \
+ \
+ return cn10k_sso_hws_get_work( \
+ ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn10k_worker_deq_burst.c b/drivers/event/cnxk/cn10k_worker_deq_burst.c
new file mode 100644
index 0000000000..29ecc551cf
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq_burst.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn10k_worker_deq_tmo.c b/drivers/event/cnxk/cn10k_worker_deq_tmo.c
new file mode 100644
index 0000000000..c8524a27bd
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_deq_tmo.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return ret; \
+ } \
+ \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ return ret; \
+ } \
+ \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn10k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn10k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn10k_sso_hws_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 072800c243..e386cb784a 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -252,17 +252,202 @@ static void
cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ /* Single WS modes */
+ const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_tmo[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_tmo_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_deq_tmo_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_deq_tmo_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ /* Dual WS modes */
+ const event_dequeue_t sso_hws_dual_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_tmo[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_tmo_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_t sso_hws_dual_deq_tmo_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
+
+ const event_dequeue_burst_t
+ sso_hws_dual_deq_tmo_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_tmo_seg_burst_##name,
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
event_dev->enqueue = cn9k_sso_hws_enq;
event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst;
-
- event_dev->dequeue = cn9k_sso_hws_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_deq_burst;
- if (dev->deq_tmo_ns) {
- event_dev->dequeue = cn9k_sso_hws_tmo_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_deq_seg
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_seg_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_deq_tmo_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_tmo_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_deq
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_burst
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_deq_tmo
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_deq_tmo_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ }
}
if (dev->dual_ws) {
@@ -272,14 +457,110 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
event_dev->enqueue_forward_burst =
cn9k_sso_hws_dual_enq_fwd_burst;
- event_dev->dequeue = cn9k_sso_hws_dual_deq;
- event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst;
- if (dev->deq_tmo_ns) {
- event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq;
- event_dev->dequeue_burst =
- cn9k_sso_hws_dual_tmo_deq_burst;
+ if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+ event_dev->dequeue = sso_hws_dual_deq_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_dual_deq_tmo_seg
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst =
+ sso_hws_dual_deq_tmo_seg_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ }
+ } else {
+ event_dev->dequeue = sso_hws_dual_deq
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst = sso_hws_dual_deq_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+ if (dev->is_timeout_deq) {
+ event_dev->dequeue = sso_hws_dual_deq_tmo
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ event_dev->dequeue_burst =
+ sso_hws_dual_deq_tmo_burst
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_VLAN_STRIP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_CHECKSUM_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_PTYPE_F)]
+ [!!(dev->rx_offloads &
+ NIX_RX_OFFLOAD_RSS_F)];
+ }
}
}
+
+ rte_mb();
}
static void *
diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c
index 9ceacc98dd..538bc4b0b3 100644
--- a/drivers/event/cnxk/cn9k_worker.c
+++ b/drivers/event/cnxk/cn9k_worker.c
@@ -60,60 +60,6 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-uint16_t __rte_hot
-cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws *ws = port;
-
- RTE_SET_USED(timeout_ticks);
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_op);
- return 1;
- }
-
- return cn9k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
- uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws *ws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (ws->swtag_req) {
- ws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(ws->tag_op);
- return ret;
- }
-
- ret = cn9k_sso_hws_get_work(ws, ev);
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
- ret = cn9k_sso_hws_get_work(ws, ev);
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
-
/* Dual ws ops. */
uint16_t __rte_hot
@@ -171,66 +117,3 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[],
return 1;
}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws_dual *dws = port;
- uint16_t gw;
-
- RTE_SET_USED(timeout_ticks);
- if (dws->swtag_req) {
- dws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
- return 1;
- }
-
- gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- return gw;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks)
-{
- struct cn9k_sso_hws_dual *dws = port;
- uint16_t ret = 1;
- uint64_t iter;
-
- if (dws->swtag_req) {
- dws->swtag_req = 0;
- cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
- return ret;
- }
-
- ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) {
- ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
- &dws->ws_state[!dws->vws], ev);
- dws->vws = !dws->vws;
- }
-
- return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events, uint64_t timeout_ticks)
-{
- RTE_SET_USED(nb_events);
-
- return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index f5a4401465..c01c00e1da 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -128,17 +128,36 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws,
}
}
+static __rte_always_inline void
+cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+ const uint32_t tag, const uint32_t flags,
+ const void *const lookup_mem)
+{
+ const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+
+ cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+ (struct rte_mbuf *)mbuf, lookup_mem,
+ mbuf_init | ((uint64_t)port_id) << 48, flags);
+}
+
static __rte_always_inline uint16_t
cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
struct cn9k_sso_hws_state *ws_pair,
- struct rte_event *ev)
+ struct rte_event *ev, const uint32_t flags,
+ const void *const lookup_mem,
+ struct cnxk_timesync_info *const tstamp)
{
const uint64_t set_gw = BIT_ULL(16) | 1;
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
+ if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+ rte_prefetch_non_temporal(lookup_mem);
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
"rty%=: \n"
@@ -147,7 +166,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
" tbnz %[tag], 63, rty%= \n"
"done%=: str %[gw], [%[pong]] \n"
" dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ " prfm pldl1keep, [%[mbuf]] \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op),
[gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op));
#else
@@ -156,12 +178,34 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
plt_write64(set_gw, ws_pair->getwrk_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -169,16 +213,22 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
}
static __rte_always_inline uint16_t
-cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
+cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev,
+ const uint32_t flags, const void *const lookup_mem)
{
union {
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t tstamp_ptr;
+ uint64_t mbuf;
plt_write64(BIT_ULL(16) | /* wait for work. */
1, /* Use Mask set 0. */
ws->getwrk_op);
+
+ if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+ rte_prefetch_non_temporal(lookup_mem);
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
" ldr %[tag], [%[tag_loc]] \n"
@@ -190,7 +240,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
" ldr %[wqp], [%[wqp_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ " prfm pldl1keep, [%[mbuf]] \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
#else
gw.u64[0] = plt_read64(ws->tag_op);
@@ -198,12 +251,35 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, flags,
+ lookup_mem);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
+ gw.u64[1]) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
+ ws->tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -218,6 +294,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
__uint128_t get_work;
uint64_t u64[2];
} gw;
+ uint64_t mbuf;
#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -230,7 +307,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
" ldr %[wqp], [%[wqp_loc]] \n"
" tbnz %[tag], 63, rty%= \n"
"done%=: dmb ld \n"
- : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+ " sub %[mbuf], %[wqp], #0x80 \n"
+ : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+ [mbuf] "=&r"(mbuf)
: [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
#else
gw.u64[0] = plt_read64(ws->tag_op);
@@ -238,12 +317,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
gw.u64[0] = plt_read64(ws->tag_op);
gw.u64[1] = plt_read64(ws->wqp_op);
+ mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
+ if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+ if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+ gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+ cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+ gw.u64[0] & 0xFFFFF, 0, NULL);
+ gw.u64[1] = mbuf;
+ }
+ }
+
ev->event = gw.u64[0];
ev->u64 = gw.u64[1];
@@ -274,28 +366,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port,
const struct rte_event ev[],
uint16_t nb_events);
-uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-
-uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
- uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port,
- struct rte_event ev[],
- uint16_t nb_events,
- uint64_t timeout_ticks);
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
#endif
diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c
new file mode 100644
index 0000000000..51ccaf4ec4
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return 1; \
+ } \
+ \
+ return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return 1; \
+ } \
+ \
+ return cn9k_sso_hws_get_work( \
+ ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_deq_burst.c b/drivers/event/cnxk/cn9k_worker_deq_burst.c
new file mode 100644
index 0000000000..4e2801459b
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq_burst.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
new file mode 100644
index 0000000000..9713d1ef00
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_tmo_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (ws->swtag_req) { \
+ ws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \
+ ret = cn9k_sso_hws_get_work(ws, ev, flags, \
+ ws->lookup_mem); \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c
new file mode 100644
index 0000000000..709fa2d9ef
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t gw; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return 1; \
+ } \
+ \
+ gw = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ return gw; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t gw; \
+ \
+ RTE_SET_USED(timeout_ticks); \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return 1; \
+ } \
+ \
+ gw = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ return gw; \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
new file mode 100644
index 0000000000..d50e1cf83f
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
new file mode 100644
index 0000000000..a0508fdf0d
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], \
+ &dws->ws_state[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ } \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_tmo_##name(port, ev, \
+ timeout_ticks); \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_##name( \
+ void *port, struct rte_event *ev, uint64_t timeout_ticks) \
+ { \
+ struct cn9k_sso_hws_dual *dws = port; \
+ uint16_t ret = 1; \
+ uint64_t iter; \
+ \
+ if (dws->swtag_req) { \
+ dws->swtag_req = 0; \
+ cnxk_sso_hws_swtag_wait( \
+ dws->ws_state[!dws->vws].tag_op); \
+ return ret; \
+ } \
+ \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
+ ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
+ ret = cn9k_sso_hws_dual_get_work( \
+ &dws->ws_state[dws->vws], \
+ &dws->ws_state[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
+ dws->vws = !dws->vws; \
+ } \
+ \
+ return ret; \
+ } \
+ \
+ uint16_t __rte_hot cn9k_sso_hws_dual_deq_tmo_seg_burst_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events, \
+ uint64_t timeout_ticks) \
+ { \
+ RTE_SET_USED(nb_events); \
+ \
+ return cn9k_sso_hws_dual_deq_tmo_seg_##name(port, ev, \
+ timeout_ticks); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index b65d725f55..9d5d2d0339 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -33,6 +33,7 @@
#define CNXK_SSO_MZ_NAME "cnxk_evdev_mz"
#define CNXK_SSO_XAQ_CACHE_CNT (0x7)
#define CNXK_SSO_XAQ_SLACK (8)
+#define CNXK_SSO_WQE_SG_PTR (9)
#define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY)
#define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY)
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index eda562f5b5..c5c1c0ee8e 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -11,8 +11,17 @@ endif
sources = files(
'cn9k_eventdev.c',
'cn9k_worker.c',
+ 'cn9k_worker_deq.c',
+ 'cn9k_worker_deq_burst.c',
+ 'cn9k_worker_deq_tmo.c',
+ 'cn9k_worker_dual_deq.c',
+ 'cn9k_worker_dual_deq_burst.c',
+ 'cn9k_worker_dual_deq_tmo.c',
'cn10k_eventdev.c',
'cn10k_worker.c',
+ 'cn10k_worker_deq.c',
+ 'cn10k_worker_deq_burst.c',
+ 'cn10k_worker_deq_tmo.c',
'cnxk_eventdev.c',
'cnxk_eventdev_adptr.c',
'cnxk_eventdev_selftest.c',
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* Re: [dpdk-dev] [PATCH v9 2/7] event/cnxk: add Rx adapter fastpath ops
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
@ 2021-07-20 11:03 ` David Marchand
2021-07-20 11:43 ` [dpdk-dev] [EXT] " Pavan Nikhilesh Bhagavatula
0 siblings, 1 reply; 93+ messages in thread
From: David Marchand @ 2021-07-20 11:03 UTC (permalink / raw)
To: Pavan Nikhilesh; +Cc: Jerin Jacob Kollanukkaran, Shijith Thotton, dev
On Wed, Jul 14, 2021 at 11:02 AM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Add support for event eth Rx adapter fastpath operations.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
This patch triggers a build issue for arm64 cross compiling on my
system with a 8.3 toolchain from Linaro.
I ended up upgrading my toolchain (which solved the issue), but some
users might hit this, so posting for info:
[2813/2834] Compiling C object
drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c.o
FAILED: drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c.o
aarch64-linux-gnu-gcc -Idrivers/libtmp_rte_event_cnxk.a.p -Idrivers
-I../../dpdk/drivers -Idrivers/event/cnxk
-I../../dpdk/drivers/event/cnxk -Ilib/eventdev
-I../../dpdk/lib/eventdev -I. -I../../dpdk -Iconfig
-I../../dpdk/config -Ilib/eal/include -I../../dpdk/lib/eal/include
-Ilib/eal/linux/include -I../../dpdk/lib/eal/linux/include
-Ilib/eal/arm/include -I../../dpdk/lib/eal/arm/include
-Ilib/eal/common -I../../dpdk/lib/eal/common -Ilib/eal
-I../../dpdk/lib/eal -Ilib/kvargs -I../../dpdk/lib/kvargs
-Ilib/metrics -I../../dpdk/lib/metrics -Ilib/telemetry
-I../../dpdk/lib/telemetry -Ilib/ring -I../../dpdk/lib/ring
-Ilib/ethdev -I../../dpdk/lib/ethdev -Ilib/net -I../../dpdk/lib/net
-Ilib/mbuf -I../../dpdk/lib/mbuf -Ilib/mempool
-I../../dpdk/lib/mempool -Ilib/meter -I../../dpdk/lib/meter -Ilib/hash
-I../../dpdk/lib/hash -Ilib/rcu -I../../dpdk/lib/rcu -Ilib/timer
-I../../dpdk/lib/timer -Ilib/cryptodev -I../../dpdk/lib/cryptodev
-Idrivers/bus/pci -I../../dpdk/drivers/bus/pci
-I../../dpdk/drivers/bus/pci/linux -Ilib/pci -I../../dpdk/lib/pci
-Idrivers/common/cnxk -I../../dpdk/drivers/common/cnxk -Ilib/security
-I../../dpdk/lib/security -Idrivers/net/cnxk
-I../../dpdk/drivers/net/cnxk -Idrivers/bus/vdev
-I../../dpdk/drivers/bus/vdev -Idrivers/mempool/cnxk
-I../../dpdk/drivers/mempool/cnxk -fdiagnostics-color=always -pipe
-D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Werror -O2 -g -include
rte_config.h -Wextra -Wcast-qual -Wdeprecated -Wformat
-Wformat-nonliteral -Wformat-security -Wmissing-declarations
-Wmissing-prototypes -Wnested-externs -Wold-style-definition
-Wpointer-arith -Wsign-compare -Wstrict-prototypes -Wundef
-Wwrite-strings -Wno-packed-not-aligned
-Wno-missing-field-initializers -D_GNU_SOURCE -fPIC -march=armv8-a+crc
-DALLOW_EXPERIMENTAL_API -DALLOW_INTERNAL_API -Wno-format-truncation
-flax-vector-conversions -Wno-strict-aliasing
-DRTE_LOG_DEFAULT_LOGTYPE=pmd.event.cnxk -MD -MQ
drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c.o -MF
drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c.o.d -o
drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c.o -c
../../dpdk/drivers/event/cnxk/cn10k_worker_deq.c
{standard input}: Assembler messages:
{standard input}:1392: Error: reg pair must start from even reg at
operand 1 -- `caspl x23,x24,x23,x24,[x2]'
{standard input}:10473: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:15726: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:19146: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x4]'
{standard input}:28825: Error: reg pair must start from even reg at
operand 1 -- `caspl x7,x8,x7,x8,[x3]'
{standard input}:30845: Error: reg pair must start from even reg at
operand 1 -- `caspl x27,x28,x27,x28,[x2]'
{standard input}:34301: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x3]'
{standard input}:40152: Error: reg pair must start from even reg at
operand 1 -- `caspl x7,x8,x7,x8,[x2]'
{standard input}:44998: Error: reg pair must start from even reg at
operand 1 -- `caspl x7,x8,x7,x8,[x2]'
{standard input}:52457: Error: reg pair must start from even reg at
operand 1 -- `caspl x27,x28,x27,x28,[x2]'
{standard input}:58407: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:62121: Error: reg pair must start from even reg at
operand 1 -- `caspl x7,x8,x7,x8,[x4]'
{standard input}:64121: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:67572: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x4]'
{standard input}:69764: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:88814: Error: reg pair must start from even reg at
operand 1 -- `caspl x27,x28,x27,x28,[x2]'
{standard input}:92747: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x4]'
{standard input}:95490: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:99628: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x2]'
{standard input}:102765: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:115148: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x4]'
{standard input}:122005: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x4]'
{standard input}:140039: Error: reg pair must start from even reg at
operand 1 -- `caspl x21,x22,x21,x22,[x2]'
{standard input}:147676: Error: reg pair must start from even reg at
operand 1 -- `caspl x21,x22,x21,x22,[x2]'
{standard input}:154953: Error: reg pair must start from even reg at
operand 1 -- `caspl x21,x22,x21,x22,[x2]'
{standard input}:159334: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x2]'
{standard input}:162769: Error: reg pair must start from even reg at
operand 1 -- `caspl x17,x18,x17,x18,[x2]'
{standard input}:167453: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x2]'
{standard input}:171071: Error: reg pair must start from even reg at
operand 1 -- `caspl x17,x18,x17,x18,[x2]'
{standard input}:179105: Error: reg pair must start from even reg at
operand 1 -- `caspl x23,x24,x23,x24,[x2]'
{standard input}:186966: Error: reg pair must start from even reg at
operand 1 -- `caspl x23,x24,x23,x24,[x2]'
{standard input}:191653: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x3]'
{standard input}:195360: Error: reg pair must start from even reg at
operand 1 -- `caspl x15,x16,x15,x16,[x3]'
{standard input}:204312: Error: reg pair must start from even reg at
operand 1 -- `caspl x15,x16,x15,x16,[x3]'
{standard input}:209537: Error: reg pair must start from even reg at
operand 1 -- `caspl x5,x6,x5,x6,[x2]'
{standard input}:222601: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:228793: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:234946: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:240956: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:258235: Error: reg pair must start from even reg at
operand 1 -- `caspl x5,x6,x5,x6,[x2]'
{standard input}:264084: Error: reg pair must start from even reg at
operand 1 -- `caspl x5,x6,x5,x6,[x2]'
{standard input}:270355: Error: reg pair must start from even reg at
operand 1 -- `caspl x7,x8,x7,x8,[x3]'
{standard input}:272988: Error: reg pair must start from even reg at
operand 1 -- `caspl x21,x22,x21,x22,[x2]'
{standard input}:277045: Error: reg pair must start from even reg at
operand 1 -- `caspl x7,x8,x7,x8,[x3]'
{standard input}:279878: Error: reg pair must start from even reg at
operand 1 -- `caspl x21,x22,x21,x22,[x2]'
{standard input}:297340: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x3]'
{standard input}:304594: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x3]'
{standard input}:315184: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:322794: Error: reg pair must start from even reg at
operand 1 -- `caspl x19,x20,x19,x20,[x2]'
{standard input}:327357: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x2]'
{standard input}:335754: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x2]'
{standard input}:361049: Error: reg pair must start from even reg at
operand 1 -- `caspl x9,x10,x9,x10,[x3]'
{standard input}:364869: Error: reg pair must start from even reg at
operand 1 -- `caspl x15,x16,x15,x16,[x2]'
{standard input}:370062: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x3]'
{standard input}:374066: Error: reg pair must start from even reg at
operand 1 -- `caspl x15,x16,x15,x16,[x2]'
{standard input}:382804: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:391016: Error: reg pair must start from even reg at
operand 1 -- `caspl x3,x4,x3,x4,[x2]'
{standard input}:441361: Error: reg pair must start from even reg at
operand 1 -- `caspl x11,x12,x11,x12,[x3]'
--
David Marchand
^ permalink raw reply [flat|nested] 93+ messages in thread
* Re: [dpdk-dev] [EXT] Re: [PATCH v9 2/7] event/cnxk: add Rx adapter fastpath ops
2021-07-20 11:03 ` David Marchand
@ 2021-07-20 11:43 ` Pavan Nikhilesh Bhagavatula
2021-07-20 11:50 ` David Marchand
0 siblings, 1 reply; 93+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2021-07-20 11:43 UTC (permalink / raw)
To: David Marchand; +Cc: Jerin Jacob Kollanukkaran, Shijith Thotton, dev
>-----Original Message-----
>From: David Marchand <david.marchand@redhat.com>
>Sent: Tuesday, July 20, 2021 4:33 PM
>To: Pavan Nikhilesh Bhagavatula <pbhagavatula@marvell.com>
>Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Shijith Thotton
><sthotton@marvell.com>; dev <dev@dpdk.org>
>Subject: [EXT] Re: [dpdk-dev] [PATCH v9 2/7] event/cnxk: add Rx
>adapter fastpath ops
>
>External Email
>
>----------------------------------------------------------------------
>On Wed, Jul 14, 2021 at 11:02 AM <pbhagavatula@marvell.com> wrote:
>>
>> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>>
>> Add support for event eth Rx adapter fastpath operations.
>>
>> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
>This patch triggers a build issue for arm64 cross compiling on my
>system with a 8.3 toolchain from Linaro.
>I ended up upgrading my toolchain (which solved the issue), but some
>users might hit this, so posting for info:
This is a known compiler bug
https://bugs.dpdk.org/show_bug.cgi?id=697
>
>
>[2813/2834] Compiling C object
>drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c.
>o
>FAILED:
>drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c.
>o
>aarch64-linux-gnu-gcc -Idrivers/libtmp_rte_event_cnxk.a.p -Idrivers
>-I../../dpdk/drivers -Idrivers/event/cnxk
>-I../../dpdk/drivers/event/cnxk -Ilib/eventdev
>-I../../dpdk/lib/eventdev -I. -I../../dpdk -Iconfig
>-I../../dpdk/config -Ilib/eal/include -I../../dpdk/lib/eal/include
>-Ilib/eal/linux/include -I../../dpdk/lib/eal/linux/include
>-Ilib/eal/arm/include -I../../dpdk/lib/eal/arm/include
>-Ilib/eal/common -I../../dpdk/lib/eal/common -Ilib/eal
>-I../../dpdk/lib/eal -Ilib/kvargs -I../../dpdk/lib/kvargs
>-Ilib/metrics -I../../dpdk/lib/metrics -Ilib/telemetry
>-I../../dpdk/lib/telemetry -Ilib/ring -I../../dpdk/lib/ring
>-Ilib/ethdev -I../../dpdk/lib/ethdev -Ilib/net -I../../dpdk/lib/net
>-Ilib/mbuf -I../../dpdk/lib/mbuf -Ilib/mempool
>-I../../dpdk/lib/mempool -Ilib/meter -I../../dpdk/lib/meter -Ilib/hash
>-I../../dpdk/lib/hash -Ilib/rcu -I../../dpdk/lib/rcu -Ilib/timer
>-I../../dpdk/lib/timer -Ilib/cryptodev -I../../dpdk/lib/cryptodev
>-Idrivers/bus/pci -I../../dpdk/drivers/bus/pci
>-I../../dpdk/drivers/bus/pci/linux -Ilib/pci -I../../dpdk/lib/pci
>-Idrivers/common/cnxk -I../../dpdk/drivers/common/cnxk -Ilib/security
>-I../../dpdk/lib/security -Idrivers/net/cnxk
>-I../../dpdk/drivers/net/cnxk -Idrivers/bus/vdev
>-I../../dpdk/drivers/bus/vdev -Idrivers/mempool/cnxk
>-I../../dpdk/drivers/mempool/cnxk -fdiagnostics-color=always -pipe
>-D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Werror -O2 -g -include
>rte_config.h -Wextra -Wcast-qual -Wdeprecated -Wformat
>-Wformat-nonliteral -Wformat-security -Wmissing-declarations
>-Wmissing-prototypes -Wnested-externs -Wold-style-definition
>-Wpointer-arith -Wsign-compare -Wstrict-prototypes -Wundef
>-Wwrite-strings -Wno-packed-not-aligned
>-Wno-missing-field-initializers -D_GNU_SOURCE -fPIC -march=armv8-
>a+crc
>-DALLOW_EXPERIMENTAL_API -DALLOW_INTERNAL_API -Wno-format-
>truncation
>-flax-vector-conversions -Wno-strict-aliasing
>-DRTE_LOG_DEFAULT_LOGTYPE=pmd.event.cnxk -MD -MQ
>drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c.
>o -MF
>drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c.
>o.d -o
>drivers/libtmp_rte_event_cnxk.a.p/event_cnxk_cn10k_worker_deq.c.
>o -c
>../../dpdk/drivers/event/cnxk/cn10k_worker_deq.c
>{standard input}: Assembler messages:
>{standard input}:1392: Error: reg pair must start from even reg at
>operand 1 -- `caspl x23,x24,x23,x24,[x2]'
>{standard input}:10473: Error: reg pair must start from even reg at
>operand 1 -- `caspl x3,x4,x3,x4,[x2]'
>{standard input}:15726: Error: reg pair must start from even reg at
>operand 1 -- `caspl x3,x4,x3,x4,[x2]'
>{standard input}:19146: Error: reg pair must start from even reg at
>operand 1 -- `caspl x9,x10,x9,x10,[x4]'
>{standard input}:28825: Error: reg pair must start from even reg at
>operand 1 -- `caspl x7,x8,x7,x8,[x3]'
>{standard input}:30845: Error: reg pair must start from even reg at
>operand 1 -- `caspl x27,x28,x27,x28,[x2]'
>{standard input}:34301: Error: reg pair must start from even reg at
>operand 1 -- `caspl x9,x10,x9,x10,[x3]'
>{standard input}:40152: Error: reg pair must start from even reg at
>operand 1 -- `caspl x7,x8,x7,x8,[x2]'
>{standard input}:44998: Error: reg pair must start from even reg at
>operand 1 -- `caspl x7,x8,x7,x8,[x2]'
>{standard input}:52457: Error: reg pair must start from even reg at
>operand 1 -- `caspl x27,x28,x27,x28,[x2]'
>{standard input}:58407: Error: reg pair must start from even reg at
>operand 1 -- `caspl x3,x4,x3,x4,[x2]'
>{standard input}:62121: Error: reg pair must start from even reg at
>operand 1 -- `caspl x7,x8,x7,x8,[x4]'
>{standard input}:64121: Error: reg pair must start from even reg at
>operand 1 -- `caspl x3,x4,x3,x4,[x2]'
>{standard input}:67572: Error: reg pair must start from even reg at
>operand 1 -- `caspl x11,x12,x11,x12,[x4]'
>{standard input}:69764: Error: reg pair must start from even reg at
>operand 1 -- `caspl x3,x4,x3,x4,[x2]'
>{standard input}:88814: Error: reg pair must start from even reg at
>operand 1 -- `caspl x27,x28,x27,x28,[x2]'
>{standard input}:92747: Error: reg pair must start from even reg at
>operand 1 -- `caspl x9,x10,x9,x10,[x4]'
>{standard input}:95490: Error: reg pair must start from even reg at
>operand 1 -- `caspl x3,x4,x3,x4,[x2]'
>{standard input}:99628: Error: reg pair must start from even reg at
>operand 1 -- `caspl x9,x10,x9,x10,[x2]'
>{standard input}:102765: Error: reg pair must start from even reg at
>operand 1 -- `caspl x3,x4,x3,x4,[x2]'
>{standard input}:115148: Error: reg pair must start from even reg at
>operand 1 -- `caspl x11,x12,x11,x12,[x4]'
>{standard input}:122005: Error: reg pair must start from even reg at
>operand 1 -- `caspl x11,x12,x11,x12,[x4]'
>{standard input}:140039: Error: reg pair must start from even reg at
>operand 1 -- `caspl x21,x22,x21,x22,[x2]'
>{standard input}:147676: Error: reg pair must start from even reg at
>operand 1 -- `caspl x21,x22,x21,x22,[x2]'
>{standard input}:154953: Error: reg pair must start from even reg at
>operand 1 -- `caspl x21,x22,x21,x22,[x2]'
>{standard input}:159334: Error: reg pair must start from even reg at
>operand 1 -- `caspl x9,x10,x9,x10,[x2]'
>{standard input}:162769: Error: reg pair must start from even reg at
>operand 1 -- `caspl x17,x18,x17,x18,[x2]'
>{standard input}:167453: Error: reg pair must start from even reg at
>operand 1 -- `caspl x11,x12,x11,x12,[x2]'
>{standard input}:171071: Error: reg pair must start from even reg at
>operand 1 -- `caspl x17,x18,x17,x18,[x2]'
>{standard input}:179105: Error: reg pair must start from even reg at
>operand 1 -- `caspl x23,x24,x23,x24,[x2]'
>{standard input}:186966: Error: reg pair must start from even reg at
>operand 1 -- `caspl x23,x24,x23,x24,[x2]'
>{standard input}:191653: Error: reg pair must start from even reg at
>operand 1 -- `caspl x11,x12,x11,x12,[x3]'
>{standard input}:195360: Error: reg pair must start from even reg at
>operand 1 -- `caspl x15,x16,x15,x16,[x3]'
>{standard input}:204312: Error: reg pair must start from even reg at
>operand 1 -- `caspl x15,x16,x15,x16,[x3]'
>{standard input}:209537: Error: reg pair must start from even reg at
>operand 1 -- `caspl x5,x6,x5,x6,[x2]'
>{standard input}:222601: Error: reg pair must start from even reg at
>operand 1 -- `caspl x3,x4,x3,x4,[x2]'
>{standard input}:228793: Error: reg pair must start from even reg at
>operand 1 -- `caspl x3,x4,x3,x4,[x2]'
>{standard input}:234946: Error: reg pair must start from even reg at
>operand 1 -- `caspl x3,x4,x3,x4,[x2]'
>{standard input}:240956: Error: reg pair must start from even reg at
>operand 1 -- `caspl x3,x4,x3,x4,[x2]'
>{standard input}:258235: Error: reg pair must start from even reg at
>operand 1 -- `caspl x5,x6,x5,x6,[x2]'
>{standard input}:264084: Error: reg pair must start from even reg at
>operand 1 -- `caspl x5,x6,x5,x6,[x2]'
>{standard input}:270355: Error: reg pair must start from even reg at
>operand 1 -- `caspl x7,x8,x7,x8,[x3]'
>{standard input}:272988: Error: reg pair must start from even reg at
>operand 1 -- `caspl x21,x22,x21,x22,[x2]'
>{standard input}:277045: Error: reg pair must start from even reg at
>operand 1 -- `caspl x7,x8,x7,x8,[x3]'
>{standard input}:279878: Error: reg pair must start from even reg at
>operand 1 -- `caspl x21,x22,x21,x22,[x2]'
>{standard input}:297340: Error: reg pair must start from even reg at
>operand 1 -- `caspl x9,x10,x9,x10,[x3]'
>{standard input}:304594: Error: reg pair must start from even reg at
>operand 1 -- `caspl x11,x12,x11,x12,[x3]'
>{standard input}:315184: Error: reg pair must start from even reg at
>operand 1 -- `caspl x3,x4,x3,x4,[x2]'
>{standard input}:322794: Error: reg pair must start from even reg at
>operand 1 -- `caspl x19,x20,x19,x20,[x2]'
>{standard input}:327357: Error: reg pair must start from even reg at
>operand 1 -- `caspl x9,x10,x9,x10,[x2]'
>{standard input}:335754: Error: reg pair must start from even reg at
>operand 1 -- `caspl x11,x12,x11,x12,[x2]'
>{standard input}:361049: Error: reg pair must start from even reg at
>operand 1 -- `caspl x9,x10,x9,x10,[x3]'
>{standard input}:364869: Error: reg pair must start from even reg at
>operand 1 -- `caspl x15,x16,x15,x16,[x2]'
>{standard input}:370062: Error: reg pair must start from even reg at
>operand 1 -- `caspl x11,x12,x11,x12,[x3]'
>{standard input}:374066: Error: reg pair must start from even reg at
>operand 1 -- `caspl x15,x16,x15,x16,[x2]'
>{standard input}:382804: Error: reg pair must start from even reg at
>operand 1 -- `caspl x3,x4,x3,x4,[x2]'
>{standard input}:391016: Error: reg pair must start from even reg at
>operand 1 -- `caspl x3,x4,x3,x4,[x2]'
>{standard input}:441361: Error: reg pair must start from even reg at
>operand 1 -- `caspl x11,x12,x11,x12,[x3]'
>
>
>--
>David Marchand
^ permalink raw reply [flat|nested] 93+ messages in thread
* Re: [dpdk-dev] [EXT] Re: [PATCH v9 2/7] event/cnxk: add Rx adapter fastpath ops
2021-07-20 11:43 ` [dpdk-dev] [EXT] " Pavan Nikhilesh Bhagavatula
@ 2021-07-20 11:50 ` David Marchand
0 siblings, 0 replies; 93+ messages in thread
From: David Marchand @ 2021-07-20 11:50 UTC (permalink / raw)
To: Pavan Nikhilesh Bhagavatula
Cc: Jerin Jacob Kollanukkaran, Shijith Thotton, dev
On Tue, Jul 20, 2021 at 1:43 PM Pavan Nikhilesh Bhagavatula
<pbhagavatula@marvell.com> wrote:
> >This patch triggers a build issue for arm64 cross compiling on my
> >system with a 8.3 toolchain from Linaro.
> >I ended up upgrading my toolchain (which solved the issue), but some
> >users might hit this, so posting for info:
>
> This is a known compiler bug
> https://bugs.dpdk.org/show_bug.cgi?id=697
I see this bz is still open, what is the next step, if any?
--
David Marchand
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v9 3/7] event/cnxk: add Tx adapter support
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
@ 2021-07-14 9:02 ` pbhagavatula
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
` (3 subsequent siblings)
5 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-14 9:02 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
Satha Rao, Pavan Nikhilesh, Shijith Thotton
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Tx adapter.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
---
drivers/common/cnxk/roc_nix.h | 1 +
drivers/common/cnxk/roc_nix_queue.c | 8 +-
drivers/event/cnxk/cn10k_eventdev.c | 91 ++++++++++++++
drivers/event/cnxk/cn9k_eventdev.c | 148 +++++++++++++++++++++++
drivers/event/cnxk/cnxk_eventdev.h | 22 +++-
drivers/event/cnxk/cnxk_eventdev_adptr.c | 88 ++++++++++++++
6 files changed, 353 insertions(+), 5 deletions(-)
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index 76613fe84e..822c1900e2 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -200,6 +200,7 @@ struct roc_nix_sq {
uint64_t aura_handle;
int16_t nb_sqb_bufs_adj;
uint16_t nb_sqb_bufs;
+ uint16_t aura_sqb_bufs;
plt_iova_t io_addr;
void *lmt_addr;
void *sqe_mem;
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index 0604e7a18e..7e2f86eca7 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -587,12 +587,12 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
aura.fc_ena = 1;
aura.fc_addr = (uint64_t)sq->fc;
aura.fc_hyst_bits = 0; /* Store count on all updates */
- rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, nb_sqb_bufs, &aura,
+ rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, NIX_MAX_SQB, &aura,
&pool);
if (rc)
goto fail;
- sq->sqe_mem = plt_zmalloc(blk_sz * nb_sqb_bufs, blk_sz);
+ sq->sqe_mem = plt_zmalloc(blk_sz * NIX_MAX_SQB, blk_sz);
if (sq->sqe_mem == NULL) {
rc = NIX_ERR_NO_MEM;
goto nomem;
@@ -600,11 +600,13 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
/* Fill the initial buffers */
iova = (uint64_t)sq->sqe_mem;
- for (count = 0; count < nb_sqb_bufs; count++) {
+ for (count = 0; count < NIX_MAX_SQB; count++) {
roc_npa_aura_op_free(sq->aura_handle, 0, iova);
iova += blk_sz;
}
roc_npa_aura_op_range_set(sq->aura_handle, (uint64_t)sq->sqe_mem, iova);
+ roc_npa_aura_limit_modify(sq->aura_handle, sq->nb_sqb_bufs);
+ sq->aura_sqb_bufs = NIX_MAX_SQB;
return rc;
nomem:
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index ba7d95fff7..8a9b04a3db 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+ ws->tx_base = ws->base;
ws->hws_id = port_id;
ws->swtag_req = 0;
ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
}
+static int
+cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ if (dev->tx_adptr_data == NULL)
+ return 0;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ struct cn10k_sso_hws *ws = event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(ws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn10k_sso_hws) +
+ (sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = ws;
+ }
+
+ return 0;
+}
+
static void
cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
@@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
{
int rc;
+ rc = cn10k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+
rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
cn10k_sso_hws_flush_events);
if (rc < 0)
@@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (ret)
+ *caps = 0;
+ else
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+ return 0;
+}
+
+static int
+cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ rc = cn10k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+ cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn10k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ return cn10k_sso_updt_tx_adptr_data(event_dev);
+}
+
static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.dev_infos_get = cn10k_sso_info_get,
.dev_configure = cn10k_sso_dev_configure,
@@ -614,6 +701,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get,
+ .eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add,
+ .eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index e386cb784a..21f80323d9 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -248,6 +248,66 @@ cn9k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
}
+static int
+cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ int i;
+
+ if (dev->tx_adptr_data == NULL)
+ return 0;
+
+ for (i = 0; i < dev->nb_event_ports; i++) {
+ if (dev->dual_ws) {
+ struct cn9k_sso_hws_dual *dws =
+ event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(dws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn9k_sso_hws_dual) +
+ (sizeof(uint64_t) *
+ (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ dws = RTE_PTR_ADD(ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&dws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = dws;
+ } else {
+ struct cn9k_sso_hws *ws = event_dev->data->ports[i];
+ void *ws_cookie;
+
+ ws_cookie = cnxk_sso_hws_get_cookie(ws);
+ ws_cookie = rte_realloc_socket(
+ ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie) +
+ sizeof(struct cn9k_sso_hws_dual) +
+ (sizeof(uint64_t) *
+ (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ if (ws_cookie == NULL)
+ return -ENOMEM;
+ ws = RTE_PTR_ADD(ws_cookie,
+ sizeof(struct cnxk_sso_hws_cookie));
+ memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
+ sizeof(uint64_t) * (dev->max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT);
+ event_dev->data->ports[i] = ws;
+ }
+ }
+ rte_mb();
+
+ return 0;
+}
+
static void
cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
{
@@ -734,6 +794,10 @@ cn9k_sso_start(struct rte_eventdev *event_dev)
{
int rc;
+ rc = cn9k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+
rc = cnxk_sso_start(event_dev, cn9k_sso_hws_reset,
cn9k_sso_hws_flush_events);
if (rc < 0)
@@ -844,6 +908,86 @@ cn9k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn9k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn9k", 8);
+ if (ret)
+ *caps = 0;
+ else
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+ return 0;
+}
+
+static void
+cn9k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id,
+ bool ena)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cn9k_eth_txq *txq;
+ struct roc_nix_sq *sq;
+ int i;
+
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+ cn9k_sso_txq_fc_update(eth_dev, i, ena);
+ } else {
+ uint16_t sq_limit;
+
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ txq = eth_dev->data->tx_queues[tx_queue_id];
+ sq_limit =
+ ena ? RTE_MIN(CNXK_SSO_SQB_LIMIT, sq->aura_sqb_bufs) :
+ sq->nb_sqb_bufs;
+ txq->nb_sqb_bufs_adj =
+ sq_limit -
+ RTE_ALIGN_MUL_CEIL(sq_limit,
+ (1ULL << txq->sqes_per_sqb_log2)) /
+ (1ULL << txq->sqes_per_sqb_log2);
+ txq->nb_sqb_bufs_adj = (70 * txq->nb_sqb_bufs_adj) / 100;
+ }
+}
+
+static int
+cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, true);
+ rc = cn9k_sso_updt_tx_adptr_data(event_dev);
+ if (rc < 0)
+ return rc;
+ cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+
+ return 0;
+}
+
+static int
+cn9k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ int rc;
+
+ RTE_SET_USED(id);
+ rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+ if (rc < 0)
+ return rc;
+ cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, false);
+ return cn9k_sso_updt_tx_adptr_data(event_dev);
+}
+
static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.dev_infos_get = cn9k_sso_info_get,
.dev_configure = cn9k_sso_dev_configure,
@@ -863,6 +1007,10 @@ static struct rte_eventdev_ops cn9k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_tx_adapter_caps_get = cn9k_sso_tx_adapter_caps_get,
+ .eth_tx_adapter_queue_add = cn9k_sso_tx_adapter_queue_add,
+ .eth_tx_adapter_queue_del = cn9k_sso_tx_adapter_queue_del,
+
.timer_adapter_caps_get = cnxk_tim_caps_get,
.dump = cnxk_sso_dump,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 9d5d2d0339..24e1be6a97 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -8,6 +8,7 @@
#include <rte_devargs.h>
#include <rte_ethdev.h>
#include <rte_event_eth_rx_adapter.h>
+#include <rte_event_eth_tx_adapter.h>
#include <rte_kvargs.h>
#include <rte_mbuf_pool_ops.h>
#include <rte_pci.h>
@@ -34,6 +35,7 @@
#define CNXK_SSO_XAQ_CACHE_CNT (0x7)
#define CNXK_SSO_XAQ_SLACK (8)
#define CNXK_SSO_WQE_SG_PTR (9)
+#define CNXK_SSO_SQB_LIMIT (0x180)
#define CNXK_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY)
#define CNXK_TT_FROM_EVENT(x) (((x) >> 38) & SSO_TT_EMPTY)
@@ -86,9 +88,12 @@ struct cnxk_sso_evdev {
rte_iova_t fc_iova;
struct rte_mempool *xaq_pool;
uint64_t rx_offloads;
+ uint64_t tx_offloads;
uint64_t adptr_xae_cnt;
uint16_t rx_adptr_pool_cnt;
uint64_t *rx_adptr_pools;
+ uint64_t *tx_adptr_data;
+ uint16_t max_port_id;
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
@@ -115,7 +120,10 @@ struct cn10k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
+ /* Tx Fastpath data */
+ uint64_t tx_base __rte_cache_aligned;
uintptr_t lmt_base;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
/* CN9K HWS ops */
@@ -140,7 +148,9 @@ struct cn9k_sso_hws {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base;
+ /* Tx Fastpath data */
+ uint64_t base __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct cn9k_sso_hws_state {
@@ -160,7 +170,9 @@ struct cn9k_sso_hws_dual {
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
- uint64_t base[2];
+ /* Tx Fastpath data */
+ uint64_t base[2] __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct cnxk_sso_hws_cookie {
@@ -267,5 +279,11 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
const struct rte_eth_dev *eth_dev);
int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
const struct rte_eth_dev *eth_dev);
+int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id);
+int cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id);
#endif /* __CNXK_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 3b7ecb375a..502da272d8 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -223,3 +223,91 @@ cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
return 0;
}
+
+static int
+cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs)
+{
+ return roc_npa_aura_limit_modify(
+ sq->aura_handle, RTE_MIN(nb_sqb_bufs, sq->aura_sqb_bufs));
+}
+
+static int
+cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev,
+ uint16_t eth_port_id, uint16_t tx_queue_id,
+ void *txq)
+{
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ uint16_t max_port_id = dev->max_port_id;
+ uint64_t *txq_data = dev->tx_adptr_data;
+
+ if (txq_data == NULL || eth_port_id > max_port_id) {
+ max_port_id = RTE_MAX(max_port_id, eth_port_id);
+ txq_data = rte_realloc_socket(
+ txq_data,
+ (sizeof(uint64_t) * (max_port_id + 1) *
+ RTE_MAX_QUEUES_PER_PORT),
+ RTE_CACHE_LINE_SIZE, event_dev->data->socket_id);
+ if (txq_data == NULL)
+ return -ENOMEM;
+ }
+
+ ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT])
+ txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq;
+ dev->max_port_id = max_port_id;
+ dev->tx_adptr_data = txq_data;
+ return 0;
+}
+
+int
+cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+ struct roc_nix_sq *sq;
+ int i, ret;
+ void *txq;
+
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+ cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, i);
+ } else {
+ txq = eth_dev->data->tx_queues[tx_queue_id];
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ cnxk_sso_sqb_aura_limit_edit(sq, CNXK_SSO_SQB_LIMIT);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id, txq);
+ if (ret < 0)
+ return ret;
+
+ dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags;
+ }
+
+ return 0;
+}
+
+int
+cnxk_sso_tx_adapter_queue_del(const struct rte_eventdev *event_dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t tx_queue_id)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+ struct roc_nix_sq *sq;
+ int i, ret;
+
+ RTE_SET_USED(event_dev);
+ if (tx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+ cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, i);
+ } else {
+ sq = &cnxk_eth_dev->sqs[tx_queue_id];
+ cnxk_sso_sqb_aura_limit_edit(sq, sq->nb_sqb_bufs);
+ ret = cnxk_sso_updt_tx_queue_data(
+ event_dev, eth_dev->data->port_id, tx_queue_id, NULL);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v9 4/7] event/cnxk: add Tx adapter fastpath ops
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 3/7] event/cnxk: add Tx adapter support pbhagavatula
@ 2021-07-14 9:02 ` pbhagavatula
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
` (2 subsequent siblings)
5 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-14 9:02 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add support for event eth Tx adapter fastpath operations.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 38 ++++++++
drivers/event/cnxk/cn10k_worker.h | 67 +++++++++++++
drivers/event/cnxk/cn10k_worker_tx_enq.c | 23 +++++
drivers/event/cnxk/cn10k_worker_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cn9k_eventdev.c | 81 ++++++++++++++++
drivers/event/cnxk/cn9k_worker.h | 97 +++++++++++++++++++
drivers/event/cnxk/cn9k_worker_dual_tx_enq.c | 23 +++++
.../event/cnxk/cn9k_worker_dual_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cn9k_worker_tx_enq.c | 23 +++++
drivers/event/cnxk/cn9k_worker_tx_enq_seg.c | 23 +++++
drivers/event/cnxk/cnxk_worker.h | 27 +++---
drivers/event/cnxk/meson.build | 6 ++
12 files changed, 440 insertions(+), 14 deletions(-)
create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c
create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c
create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 8a9b04a3db..e462f770c5 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
#undef R
};
+ /* Tx modes */
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
@@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
}
}
+
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
+
+ event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
}
static void
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index b724083caa..3c90c85009 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -11,6 +11,7 @@
#include "cn10k_ethdev.h"
#include "cn10k_rx.h"
+#include "cn10k_tx.h"
/* SSO Operations */
@@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
NIX_RX_FASTPATH_MODES
#undef R
+static __rte_always_inline const struct cn10k_eth_txq *
+cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+ return (const struct cn10k_eth_txq *)
+ txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline uint16_t
+cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
+ uint64_t *cmd,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ const struct cn10k_eth_txq *txq;
+ struct rte_mbuf *m = ev->mbuf;
+ uint16_t ref_cnt = m->refcnt;
+ uintptr_t lmt_addr;
+ uint16_t lmt_id;
+ uintptr_t pa;
+
+ lmt_addr = ws->lmt_base;
+ ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+ txq = cn10k_sso_hws_xtract_meta(m, txq_data);
+ cn10k_nix_tx_skeleton(txq, cmd, flags);
+ /* Perform header writes before barrier for TSO */
+ if (flags & NIX_TX_OFFLOAD_TSO_F)
+ cn10k_nix_xmit_prepare_tso(m, flags);
+
+ cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt);
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw =
+ cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags);
+ pa = txq->io_addr | ((segdw - 1) << 4);
+ } else {
+ pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
+ }
+ if (!ev->sched_type)
+ cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
+
+ roc_lmt_submit_steorl(lmt_id, pa);
+
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if (ref_cnt > 1)
+ return 1;
+ }
+
+ cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
+ ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+ return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
#endif
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c
new file mode 100644
index 0000000000..f9968ac0d0
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn10k_sso_hws *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn10k_sso_hws_event_tx( \
+ ws, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
new file mode 100644
index 0000000000..a24fc42e5a
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn10k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn10k_sso_hws_event_tx( \
+ ws, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 21f80323d9..a69edff195 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -430,6 +430,39 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
#undef R
};
+ /* Tx modes */
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
+ const event_tx_adapter_enqueue
+ sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name,
+ NIX_TX_FASTPATH_MODES
+#undef T
+ };
+
event_dev->enqueue = cn9k_sso_hws_enq;
event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
@@ -510,6 +543,25 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
}
}
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
+
if (dev->dual_ws) {
event_dev->enqueue = cn9k_sso_hws_dual_enq;
event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst;
@@ -618,8 +670,37 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
NIX_RX_OFFLOAD_RSS_F)];
}
}
+
+ if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+ /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM]
+ */
+ event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ } else {
+ event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+ [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+ [!!(dev->tx_offloads &
+ NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+ }
}
+ event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
rte_mb();
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index c01c00e1da..3f9751211a 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -11,6 +11,7 @@
#include "cn9k_ethdev.h"
#include "cn9k_rx.h"
+#include "cn9k_tx.h"
/* SSO Operations */
@@ -416,4 +417,100 @@ NIX_RX_FASTPATH_MODES
NIX_RX_FASTPATH_MODES
#undef R
+static __rte_always_inline void
+cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq)
+{
+ while (!(((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)
+ << (txq)->sqes_per_sqb_log2))
+ ;
+}
+
+static __rte_always_inline const struct cn9k_eth_txq *
+cn9k_sso_hws_xtract_meta(struct rte_mbuf *m,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+ return (const struct cn9k_eth_txq *)
+ txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline void
+cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m,
+ uint64_t *cmd, const uint32_t flags)
+{
+ roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags));
+ cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt);
+}
+
+static __rte_always_inline uint16_t
+cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ struct rte_mbuf *m = ev->mbuf;
+ const struct cn9k_eth_txq *txq;
+ uint16_t ref_cnt = m->refcnt;
+
+ /* Perform header writes before barrier for TSO */
+ cn9k_nix_xmit_prepare_tso(m, flags);
+ /* Lets commit any changes in the packet here in case when
+ * fast free is set as no further changes will be made to mbuf.
+ * In case of fast free is not set, both cn9k_nix_prepare_mseg()
+ * and cn9k_nix_xmit_prepare() has a barrier after refcnt update.
+ */
+ if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
+ rte_io_wmb();
+ txq = cn9k_sso_hws_xtract_meta(m, txq_data);
+ cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags);
+
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
+ if (!CNXK_TT_FROM_EVENT(ev->event)) {
+ cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
+ cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ cn9k_sso_txq_fc_wait(txq);
+ if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+ cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
+ txq->io_addr, segdw);
+ } else {
+ cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr,
+ segdw);
+ }
+ } else {
+ if (!CNXK_TT_FROM_EVENT(ev->event)) {
+ cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
+ cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ cn9k_sso_txq_fc_wait(txq);
+ if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+ cn9k_nix_xmit_one(cmd, txq->lmt_addr,
+ txq->io_addr, flags);
+ } else {
+ cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr,
+ flags);
+ }
+ }
+
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+ if (ref_cnt > 1)
+ return 1;
+ }
+
+ cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG,
+ base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+ return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events); \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
#endif
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
new file mode 100644
index 0000000000..92e2981f02
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn9k_sso_hws_dual *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base[!ws->vws], &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
new file mode 100644
index 0000000000..dfb574cf95
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn9k_sso_hws_dual *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base[!ws->vws], &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c
new file mode 100644
index 0000000000..3df649c0c8
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ struct cn9k_sso_hws *ws = port; \
+ uint64_t cmd[sz]; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ flags); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
new file mode 100644
index 0000000000..0efe29113e
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
+ uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \
+ void *port, struct rte_event ev[], uint16_t nb_events) \
+ { \
+ uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct cn9k_sso_hws *ws = port; \
+ \
+ RTE_SET_USED(nb_events); \
+ return cn9k_sso_hws_event_tx( \
+ ws->base, &ev[0], cmd, \
+ (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
+ ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F); \
+ }
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 4eb46ae162..7891b749df 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -79,21 +79,20 @@ static __rte_always_inline void
cnxk_sso_hws_head_wait(uintptr_t tag_op)
{
#ifdef RTE_ARCH_ARM64
- uint64_t swtp;
-
- asm volatile(PLT_CPU_FEATURE_PREAMBLE
- " ldr %[swtb], [%[swtp_loc]] \n"
- " tbz %[swtb], 35, done%= \n"
- " sevl \n"
- "rty%=: wfe \n"
- " ldr %[swtb], [%[swtp_loc]] \n"
- " tbnz %[swtb], 35, rty%= \n"
- "done%=: \n"
- : [swtb] "=&r"(swtp)
- : [swtp_loc] "r"(tag_op));
+ uint64_t tag;
+
+ asm volatile(" ldr %[tag], [%[tag_op]] \n"
+ " tbnz %[tag], 35, done%= \n"
+ " sevl \n"
+ "rty%=: wfe \n"
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbz %[tag], 35, rty%= \n"
+ "done%=: \n"
+ : [tag] "=&r"(tag)
+ : [tag_op] "r"(tag_op));
#else
- /* Wait for the SWTAG/SWTAG_FULL operation */
- while (plt_read64(tag_op) & BIT_ULL(35))
+ /* Wait for the HEAD to be set */
+ while (!(plt_read64(tag_op) & BIT_ULL(35)))
;
#endif
}
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index c5c1c0ee8e..13e0634e86 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -17,11 +17,17 @@ sources = files(
'cn9k_worker_dual_deq.c',
'cn9k_worker_dual_deq_burst.c',
'cn9k_worker_dual_deq_tmo.c',
+ 'cn9k_worker_tx_enq.c',
+ 'cn9k_worker_tx_enq_seg.c',
+ 'cn9k_worker_dual_tx_enq.c',
+ 'cn9k_worker_dual_tx_enq_seg.c',
'cn10k_eventdev.c',
'cn10k_worker.c',
'cn10k_worker_deq.c',
'cn10k_worker_deq_burst.c',
'cn10k_worker_deq_tmo.c',
+ 'cn10k_worker_tx_enq.c',
+ 'cn10k_worker_tx_enq_seg.c',
'cnxk_eventdev.c',
'cnxk_eventdev_adptr.c',
'cnxk_eventdev_selftest.c',
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v9 5/7] event/cnxk: add Rx adapter vector support
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (2 preceding siblings ...)
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
@ 2021-07-14 9:02 ` pbhagavatula
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 7/7] event/cnxk: add Tx " pbhagavatula
5 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-14 9:02 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add event vector support for cnxk event Rx adapter, add control path
APIs to get vector limits and ability to configure event vectorization
on a given Rx queue.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 106 ++++++++++++++++++++++-
drivers/event/cnxk/cnxk_eventdev.h | 2 +
drivers/event/cnxk/cnxk_eventdev_adptr.c | 25 ++++++
drivers/net/cnxk/cnxk_ethdev.h | 2 +-
4 files changed, 133 insertions(+), 2 deletions(-)
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index e462f770c5..e85fa4785d 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
else
*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
- RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+ RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
return 0;
}
@@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
}
+static int
+cn10k_sso_rx_adapter_vector_limits(
+ const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+ struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev;
+ int ret;
+
+ RTE_SET_USED(dev);
+ ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (ret)
+ return -ENOTSUP;
+
+ cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+ limits->log2_sz = true;
+ limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+ limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+ limits->min_timeout_ns =
+ (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100;
+ limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns;
+
+ return 0;
+}
+
+static int
+cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
+ uint16_t port_id, uint16_t rq_id, uint16_t sz,
+ uint64_t tmo_ns, struct rte_mempool *vmp)
+{
+ struct roc_nix_rq *rq;
+
+ rq = &cnxk_eth_dev->rqs[rq_id];
+
+ if (!rq->sso_ena)
+ return -EINVAL;
+ if (rq->flow_tag_width == 0)
+ return -EINVAL;
+
+ rq->vwqe_ena = 1;
+ rq->vwqe_first_skip = 0;
+ rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id);
+ rq->vwqe_max_sz_exp = rte_log2_u32(sz);
+ rq->vwqe_wait_tmo =
+ tmo_ns /
+ ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100);
+ rq->tag_mask = (port_id & 0xF) << 20;
+ rq->tag_mask |=
+ (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4))
+ << 24;
+
+ return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+static int
+cn10k_sso_rx_adapter_vector_config(
+ const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_event_vector_config *config)
+{
+ struct cnxk_eth_dev *cnxk_eth_dev;
+ struct cnxk_sso_evdev *dev;
+ int i, rc;
+
+ rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+ if (rc)
+ return -EINVAL;
+
+ dev = cnxk_sso_pmd_priv(event_dev);
+ cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+ if (rx_queue_id < 0) {
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+ cnxk_sso_updt_xae_cnt(dev, config->vector_mp,
+ RTE_EVENT_TYPE_ETHDEV_VECTOR);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc = cnxk_sso_rx_adapter_vwqe_enable(
+ cnxk_eth_dev, eth_dev->data->port_id, i,
+ config->vector_sz, config->vector_timeout_ns,
+ config->vector_mp);
+ if (rc)
+ return -EINVAL;
+ }
+ } else {
+
+ cnxk_sso_updt_xae_cnt(dev, config->vector_mp,
+ RTE_EVENT_TYPE_ETHDEV_VECTOR);
+ rc = cnxk_sso_xae_reconfigure(
+ (struct rte_eventdev *)(uintptr_t)event_dev);
+ rc = cnxk_sso_rx_adapter_vwqe_enable(
+ cnxk_eth_dev, eth_dev->data->port_id, rx_queue_id,
+ config->vector_sz, config->vector_timeout_ns,
+ config->vector_mp);
+ if (rc)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int
cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
const struct rte_eth_dev *eth_dev, uint32_t *caps)
@@ -739,6 +839,10 @@ static struct rte_eventdev_ops cn10k_sso_dev_ops = {
.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+ .eth_rx_adapter_vector_limits_get = cn10k_sso_rx_adapter_vector_limits,
+ .eth_rx_adapter_event_vector_config =
+ cn10k_sso_rx_adapter_vector_config,
+
.eth_tx_adapter_caps_get = cn10k_sso_tx_adapter_caps_get,
.eth_tx_adapter_queue_add = cn10k_sso_tx_adapter_queue_add,
.eth_tx_adapter_queue_del = cn10k_sso_tx_adapter_queue_del,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 24e1be6a97..fc49b88d6f 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -97,6 +97,8 @@ struct cnxk_sso_evdev {
uint16_t tim_adptr_ring_cnt;
uint16_t *timer_adptr_rings;
uint64_t *timer_adptr_sz;
+ uint16_t vec_pool_cnt;
+ uint64_t *vec_pools;
/* Dev args */
uint32_t xae_cnt;
uint8_t qos_queue_cnt;
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 502da272d8..baf2f2aa6b 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -38,6 +38,31 @@ cnxk_sso_updt_xae_cnt(struct cnxk_sso_evdev *dev, void *data,
dev->adptr_xae_cnt += rxq->qconf.mp->size;
break;
}
+ case RTE_EVENT_TYPE_ETHDEV_VECTOR: {
+ struct rte_mempool *mp = data;
+ uint64_t *old_ptr;
+
+ for (i = 0; i < dev->vec_pool_cnt; i++) {
+ if ((uint64_t)mp == dev->vec_pools[i])
+ return;
+ }
+
+ dev->vec_pool_cnt++;
+ old_ptr = dev->vec_pools;
+ dev->vec_pools =
+ rte_realloc(dev->vec_pools,
+ sizeof(uint64_t) * dev->vec_pool_cnt, 0);
+ if (dev->vec_pools == NULL) {
+ dev->adptr_xae_cnt += mp->size;
+ dev->vec_pools = old_ptr;
+ dev->vec_pool_cnt--;
+ return;
+ }
+ dev->vec_pools[dev->vec_pool_cnt - 1] = (uint64_t)mp;
+
+ dev->adptr_xae_cnt += mp->size;
+ break;
+ }
case RTE_EVENT_TYPE_TIMER: {
struct cnxk_tim_ring *timr = data;
uint16_t *old_ring_ptr;
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 4eead03905..2528b3cdaa 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -238,7 +238,7 @@ struct cnxk_eth_txq_sp {
} __plt_cache_aligned;
static inline struct cnxk_eth_dev *
-cnxk_eth_pmd_priv(struct rte_eth_dev *eth_dev)
+cnxk_eth_pmd_priv(const struct rte_eth_dev *eth_dev)
{
return eth_dev->data->dev_private;
}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v9 6/7] event/cnxk: add Rx event vector fastpath
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (3 preceding siblings ...)
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
@ 2021-07-14 9:02 ` pbhagavatula
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 7/7] event/cnxk: add Tx " pbhagavatula
5 siblings, 0 replies; 93+ messages in thread
From: pbhagavatula @ 2021-07-14 9:02 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add Rx event vector fastpath to convert HW defined metadata into
rte_mbuf and rte_event_vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_worker.h | 56 ++++++++
drivers/net/cnxk/cn10k_rx.h | 200 ++++++++++++++++-----------
drivers/net/cnxk/cn10k_rx_vec.c | 2 +-
drivers/net/cnxk/cn10k_rx_vec_mseg.c | 5 +-
4 files changed, 178 insertions(+), 85 deletions(-)
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 3c90c85009..7a48a6b17d 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,6 +5,8 @@
#ifndef __CN10K_WORKER_H__
#define __CN10K_WORKER_H__
+#include <rte_vect.h>
+
#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
@@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
mbuf_init | ((uint64_t)port_id) << 48, flags);
}
+static __rte_always_inline void
+cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags,
+ void *lookup_mem, void *tstamp)
+{
+ uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
+ (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+ struct rte_event_vector *vec;
+ uint16_t nb_mbufs, non_vec;
+ uint64_t **wqe;
+
+ mbuf_init |= ((uint64_t)port_id) << 48;
+ vec = (struct rte_event_vector *)vwqe;
+ wqe = vec->u64s;
+
+ nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+ nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs,
+ flags | NIX_RX_VWQE_F, lookup_mem,
+ tstamp);
+ wqe += nb_mbufs;
+ non_vec = vec->nb_elem - nb_mbufs;
+
+ while (non_vec) {
+ struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+ struct rte_mbuf *mbuf;
+ uint64_t tstamp_ptr;
+
+ mbuf = (struct rte_mbuf *)((char *)cqe -
+ sizeof(struct rte_mbuf));
+ cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem,
+ mbuf_init, flags);
+ /* Extracting tstamp, if PTP enabled*/
+ tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) +
+ CNXK_SSO_WQE_SG_PTR);
+ cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+ flags & NIX_RX_OFFLOAD_TSTAMP_F,
+ flags & NIX_RX_MULTI_SEG_F,
+ (uint64_t *)tstamp_ptr);
+ wqe[0] = (uint64_t *)mbuf;
+ non_vec--;
+ wqe++;
+ }
+}
+
static __rte_always_inline uint16_t
cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
const uint32_t flags, void *lookup_mem)
@@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
flags & NIX_RX_MULTI_SEG_F,
(uint64_t *)tstamp_ptr);
gw.u64[1] = mbuf;
+ } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+ RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+ uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+ __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1];
+
+ vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) |
+ ((vwqe_hdr & 0xFFFF) << 48) |
+ ((uint64_t)port << 32);
+ *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr;
+ cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem,
+ ws->tstamp);
}
}
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index d9572b19e7..4c5288b2cc 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -21,6 +21,7 @@
* Defining it from backwards to denote its been
* not used as offload flags to pick function
*/
+#define NIX_RX_VWQE_F BIT(14)
#define NIX_RX_MULTI_SEG_F BIT(15)
#define CNXK_NIX_CQ_ENTRY_SZ 128
@@ -28,6 +29,11 @@
#define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x))
#define CQE_SZ(x) ((x) * CNXK_NIX_CQ_ENTRY_SZ)
+#define CQE_PTR_OFF(b, i, o, f) \
+ (((f) & NIX_RX_VWQE_F) ? \
+ (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \
+ (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o)))
+
union mbuf_initializer {
struct {
uint16_t data_off;
@@ -317,61 +323,87 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf)
}
static __rte_always_inline uint16_t
-cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t pkts, const uint16_t flags)
+cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts,
+ const uint16_t flags, void *lookup_mem,
+ struct cnxk_timesync_info *tstamp)
{
- struct cn10k_eth_rxq *rxq = rx_queue;
- uint16_t packets = 0;
+ struct cn10k_eth_rxq *rxq = args;
+ const uint64_t mbuf_initializer = (flags & NIX_RX_VWQE_F) ?
+ *(uint64_t *)args :
+ rxq->mbuf_initializer;
+ const uint64x2_t data_off = flags & NIX_RX_VWQE_F ?
+ vdupq_n_u64(0x80ULL) :
+ vdupq_n_u64(rxq->data_off);
+ const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask;
+ const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata;
+ const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc;
uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23;
- const uint64_t mbuf_initializer = rxq->mbuf_initializer;
- const uint64x2_t data_off = vdupq_n_u64(rxq->data_off);
uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3;
uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer);
uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer);
struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3;
- const uint16_t *lookup_mem = rxq->lookup_mem;
- const uint32_t qmask = rxq->qmask;
- const uint64_t wdata = rxq->wdata;
- const uintptr_t desc = rxq->desc;
uint8x16_t f0, f1, f2, f3;
- uint32_t head = rxq->head;
+ uint16_t packets = 0;
uint16_t pkts_left;
-
- pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
- pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
-
- /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
- pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ uint32_t head;
+ uintptr_t cq0;
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ lookup_mem = rxq->lookup_mem;
+ head = rxq->head;
+
+ pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
+ pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
+ /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+ tstamp = rxq->tstamp;
+ } else {
+ RTE_SET_USED(head);
+ }
while (packets < pkts) {
- /* Exit loop if head is about to wrap and become unaligned */
- if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
- NIX_DESCS_PER_LOOP) {
- pkts_left += (pkts - packets);
- break;
- }
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Exit loop if head is about to wrap and become
+ * unaligned.
+ */
+ if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
+ NIX_DESCS_PER_LOOP) {
+ pkts_left += (pkts - packets);
+ break;
+ }
- const uintptr_t cq0 = desc + CQE_SZ(head);
+ cq0 = desc + CQE_SZ(head);
+ } else {
+ cq0 = (uintptr_t)&mbufs[packets];
+ }
/* Prefetch N desc ahead */
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(8)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(9)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(10)));
- rte_prefetch_non_temporal((void *)(cq0 + CQE_SZ(11)));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 8, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 9, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 10, 0, flags));
+ rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 11, 0, flags));
/* Get NIX_RX_SG_S for size and buffer pointer */
- cq0_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(0) + 64));
- cq1_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(1) + 64));
- cq2_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(2) + 64));
- cq3_w8 = vld1q_u64((uint64_t *)(cq0 + CQE_SZ(3) + 64));
-
- /* Extract mbuf from NIX_RX_SG_S */
- mbuf01 = vzip2q_u64(cq0_w8, cq1_w8);
- mbuf23 = vzip2q_u64(cq2_w8, cq3_w8);
- mbuf01 = vqsubq_u64(mbuf01, data_off);
- mbuf23 = vqsubq_u64(mbuf23, data_off);
+ cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags));
+ cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags));
+ cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags));
+ cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags));
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Extract mbuf from NIX_RX_SG_S */
+ mbuf01 = vzip2q_u64(cq0_w8, cq1_w8);
+ mbuf23 = vzip2q_u64(cq2_w8, cq3_w8);
+ mbuf01 = vqsubq_u64(mbuf01, data_off);
+ mbuf23 = vqsubq_u64(mbuf23, data_off);
+ } else {
+ mbuf01 =
+ vsubq_u64(vld1q_u64((uint64_t *)cq0), data_off);
+ mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)),
+ data_off);
+ }
/* Move mbufs to scalar registers for future use */
mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0);
@@ -395,14 +427,14 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
f3 = vqtbl1q_u8(cq3_w8, shuf_msk);
/* Load CQE word0 and word 1 */
- uint64_t cq0_w0 = ((uint64_t *)(cq0 + CQE_SZ(0)))[0];
- uint64_t cq0_w1 = ((uint64_t *)(cq0 + CQE_SZ(0)))[1];
- uint64_t cq1_w0 = ((uint64_t *)(cq0 + CQE_SZ(1)))[0];
- uint64_t cq1_w1 = ((uint64_t *)(cq0 + CQE_SZ(1)))[1];
- uint64_t cq2_w0 = ((uint64_t *)(cq0 + CQE_SZ(2)))[0];
- uint64_t cq2_w1 = ((uint64_t *)(cq0 + CQE_SZ(2)))[1];
- uint64_t cq3_w0 = ((uint64_t *)(cq0 + CQE_SZ(3)))[0];
- uint64_t cq3_w1 = ((uint64_t *)(cq0 + CQE_SZ(3)))[1];
+ const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags);
+ const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 8, flags);
+ const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags);
+ const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 8, flags);
+ const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags);
+ const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 8, flags);
+ const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags);
+ const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 8, flags);
if (flags & NIX_RX_OFFLOAD_RSS_F) {
/* Fill rss in the rx_descriptor_fields1 */
@@ -459,17 +491,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) {
ol_flags0 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(0) + 38), ol_flags0,
- mbuf0);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags),
+ ol_flags0, mbuf0);
ol_flags1 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(1) + 38), ol_flags1,
- mbuf1);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags),
+ ol_flags1, mbuf1);
ol_flags2 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(2) + 38), ol_flags2,
- mbuf2);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags),
+ ol_flags2, mbuf2);
ol_flags3 = nix_update_match_id(
- *(uint16_t *)(cq0 + CQE_SZ(3) + 38), ol_flags3,
- mbuf3);
+ *(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags),
+ ol_flags3, mbuf3);
}
if (flags & NIX_RX_OFFLOAD_TSTAMP_F) {
@@ -488,7 +520,7 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
RTE_PTYPE_L2_ETHER_TIMESYNC};
const uint64_t ts_olf = PKT_RX_IEEE1588_PTP |
PKT_RX_IEEE1588_TMST |
- rxq->tstamp->rx_tstamp_dynflag;
+ tstamp->rx_tstamp_dynflag;
const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8};
uint64x2_t ts01, ts23, mask;
uint64_t ts[4];
@@ -526,14 +558,10 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
ts[3] = vgetq_lane_u64(ts23, 1);
/* Store timestamp into dynfield. */
- *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) =
- ts[0];
- *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) =
- ts[1];
- *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) =
- ts[2];
- *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) =
- ts[3];
+ *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0];
+ *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1];
+ *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2];
+ *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3];
/* Generate ptype mask to filter L2 ether timesync */
mask = vdupq_n_u32(vgetq_lane_u32(f0, 0));
@@ -559,9 +587,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
/* Update Rxq timestamp with the latest
* timestamp.
*/
- rxq->tstamp->rx_ready = 1;
- rxq->tstamp->rx_tstamp =
- ts[31 - __builtin_clz(res)];
+ tstamp->rx_ready = 1;
+ tstamp->rx_tstamp = ts[31 - __builtin_clz(res)];
}
}
@@ -584,25 +611,25 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
/* Store the mbufs to rx_pkts */
- vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
- vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
+ vst1q_u64((uint64_t *)&mbufs[packets], mbuf01);
+ vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23);
if (flags & NIX_RX_MULTI_SEG_F) {
/* Multi segment is enable build mseg list for
* individual mbufs in scalar mode.
*/
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(0) + 8), mbuf0,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 0, 8, flags)),
+ mbuf0, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(1) + 8), mbuf1,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 1, 8, flags)),
+ mbuf1, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(2) + 8), mbuf2,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 2, 8, flags)),
+ mbuf2, mbuf_initializer, flags);
nix_cqe_xtract_mseg((union nix_rx_parse_u *)
- (cq0 + CQE_SZ(3) + 8), mbuf3,
- mbuf_initializer, flags);
+ (CQE_PTR_OFF(cq0, 3, 8, flags)),
+ mbuf3, mbuf_initializer, flags);
} else {
/* Update that no more segments */
mbuf0->next = NULL;
@@ -623,12 +650,18 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
__mempool_check_cookies(mbuf2->pool, (void **)&mbuf2, 1, 1);
__mempool_check_cookies(mbuf3->pool, (void **)&mbuf3, 1, 1);
- /* Advance head pointer and packets */
- head += NIX_DESCS_PER_LOOP;
- head &= qmask;
packets += NIX_DESCS_PER_LOOP;
+
+ if (!(flags & NIX_RX_VWQE_F)) {
+ /* Advance head pointer and packets */
+ head += NIX_DESCS_PER_LOOP;
+ head &= qmask;
+ }
}
+ if (flags & NIX_RX_VWQE_F)
+ return packets;
+
rxq->head = head;
rxq->available -= packets;
@@ -637,8 +670,8 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
plt_write64((rxq->wdata | packets), rxq->cq_door);
if (unlikely(pkts_left))
- packets += cn10k_nix_recv_pkts(rx_queue, &rx_pkts[packets],
- pkts_left, flags);
+ packets += cn10k_nix_recv_pkts(args, &mbufs[packets], pkts_left,
+ flags);
return packets;
}
@@ -647,12 +680,15 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
static inline uint16_t
cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t pkts, const uint16_t flags)
+ uint16_t pkts, const uint16_t flags,
+ void *lookup_mem, void *tstamp)
{
+ RTE_SET_USED(lookup_mem);
RTE_SET_USED(rx_queue);
RTE_SET_USED(rx_pkts);
RTE_SET_USED(pkts);
RTE_SET_USED(flags);
+ RTE_SET_USED(tstamp);
return 0;
}
diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c
index 93528a44f9..166735ad59 100644
--- a/drivers/net/cnxk/cn10k_rx_vec.c
+++ b/drivers/net/cnxk/cn10k_rx_vec.c
@@ -12,7 +12,7 @@
uint16_t pkts) \
{ \
return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
- (flags)); \
+ (flags), NULL, NULL); \
}
NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
index 04d1e46c82..1f44dddddd 100644
--- a/drivers/net/cnxk/cn10k_rx_vec_mseg.c
+++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
@@ -9,8 +9,9 @@
uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
{ \
- return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
- (flags) | NIX_RX_MULTI_SEG_F); \
+ return cn10k_nix_recv_pkts_vector( \
+ rx_queue, rx_pkts, pkts, (flags) | NIX_RX_MULTI_SEG_F, \
+ NULL, NULL); \
}
NIX_RX_FASTPATH_MODES
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* [dpdk-dev] [PATCH v9 7/7] event/cnxk: add Tx event vector fastpath
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 1/7] event/cnxk: add Rx adapter support pbhagavatula
` (4 preceding siblings ...)
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
@ 2021-07-14 9:02 ` pbhagavatula
2021-07-16 12:19 ` Jerin Jacob
5 siblings, 1 reply; 93+ messages in thread
From: pbhagavatula @ 2021-07-14 9:02 UTC (permalink / raw)
To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
Satha Rao, Pavan Nikhilesh, Shijith Thotton
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add Tx event vector fastpath, integrate event vector Tx routine
into Tx burst.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/common/cnxk/roc_sso.h | 23 ++++++
drivers/event/cnxk/cn10k_eventdev.c | 3 +-
drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++++--
drivers/event/cnxk/cn9k_worker.h | 4 +-
drivers/event/cnxk/cnxk_worker.h | 22 ------
drivers/net/cnxk/cn10k_tx.c | 2 +-
drivers/net/cnxk/cn10k_tx.h | 52 +++++++++-----
drivers/net/cnxk/cn10k_tx_mseg.c | 3 +-
drivers/net/cnxk/cn10k_tx_vec.c | 2 +-
drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +-
10 files changed, 165 insertions(+), 52 deletions(-)
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index a6030e7d8a..b28f6089cc 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -44,6 +44,29 @@ struct roc_sso {
uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
} __plt_cache_aligned;
+static __plt_always_inline void
+roc_sso_hws_head_wait(uintptr_t tag_op)
+{
+#ifdef RTE_ARCH_ARM64
+ uint64_t tag;
+
+ asm volatile(PLT_CPU_FEATURE_PREAMBLE
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbnz %[tag], 35, done%= \n"
+ " sevl \n"
+ "rty%=: wfe \n"
+ " ldr %[tag], [%[tag_op]] \n"
+ " tbz %[tag], 35, rty%= \n"
+ "done%=: \n"
+ : [tag] "=&r"(tag)
+ : [tag_op] "r"(tag_op));
+#else
+ /* Wait for the SWTAG/SWTAG_FULL operation */
+ while (!(plt_read64(tag_op) & BIT_ULL(35)))
+ ;
+#endif
+}
+
/* SSO device initialization */
int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso);
int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso);
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index e85fa4785d..6f37c5bd23 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
if (ret)
*caps = 0;
else
- *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+ *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+ RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
return 0;
}
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 7a48a6b17d..9cc0992063 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
NIX_RX_FASTPATH_MODES
#undef R
-static __rte_always_inline const struct cn10k_eth_txq *
+static __rte_always_inline struct cn10k_eth_txq *
cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
{
- return (const struct cn10k_eth_txq *)
+ return (struct cn10k_eth_txq *)
txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
}
+static __rte_always_inline void
+cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+ uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr,
+ uint8_t sched_type, uintptr_t base,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ uint16_t port[4], queue[4];
+ struct cn10k_eth_txq *txq;
+ uint16_t i, j;
+ uintptr_t pa;
+
+ for (i = 0; i < nb_mbufs; i += 4) {
+ port[0] = mbufs[i]->port;
+ port[1] = mbufs[i + 1]->port;
+ port[2] = mbufs[i + 2]->port;
+ port[3] = mbufs[i + 3]->port;
+
+ queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
+ queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]);
+ queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]);
+ queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]);
+
+ if (((port[0] ^ port[1]) & (port[2] ^ port[3])) ||
+ ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) {
+
+ for (j = 0; j < 4; j++) {
+ struct rte_mbuf *m = mbufs[i + j];
+
+ txq = (struct cn10k_eth_txq *)
+ txq_data[port[j]][queue[j]];
+ cn10k_nix_tx_skeleton(txq, cmd, flags);
+ /* Perform header writes before barrier
+ * for TSO
+ */
+ if (flags & NIX_TX_OFFLOAD_TSO_F)
+ cn10k_nix_xmit_prepare_tso(m, flags);
+
+ cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags,
+ txq->lso_tun_fmt);
+ if (flags & NIX_TX_MULTI_SEG_F) {
+ const uint16_t segdw =
+ cn10k_nix_prepare_mseg(
+ m, (uint64_t *)lmt_addr,
+ flags);
+ pa = txq->io_addr | ((segdw - 1) << 4);
+ } else {
+ pa = txq->io_addr |
+ (cn10k_nix_tx_ext_subs(flags) + 1)
+ << 4;
+ }
+ if (!sched_type)
+ roc_sso_hws_head_wait(base +
+ SSOW_LF_GWS_TAG);
+
+ roc_lmt_submit_steorl(lmt_id, pa);
+ }
+ } else {
+ txq = (struct cn10k_eth_txq *)
+ txq_data[port[0]][queue[0]];
+ cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base
+ + SSOW_LF_GWS_TAG,
+ flags | NIX_TX_VWQE_F);
+ }
+ }
+}
+
static __rte_always_inline uint16_t
cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
uint64_t *cmd,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
const uint32_t flags)
{
- const struct cn10k_eth_txq *txq;
- struct rte_mbuf *m = ev->mbuf;
- uint16_t ref_cnt = m->refcnt;
+ struct cn10k_eth_txq *txq;
+ struct rte_mbuf *m;
uintptr_t lmt_addr;
+ uint16_t ref_cnt;
uint16_t lmt_id;
uintptr_t pa;
lmt_addr = ws->lmt_base;
ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+ if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
+ struct rte_mbuf **mbufs = ev->vec->mbufs;
+ uint64_t meta = *(uint64_t *)ev->vec;
+
+ if (meta & BIT(31)) {
+ txq = (struct cn10k_eth_txq *)
+ txq_data[meta >> 32][meta >> 48];
+
+ cn10k_nix_xmit_pkts_vector(
+ txq, mbufs, meta & 0xFFFF, cmd,
+ ws->tx_base + SSOW_LF_GWS_TAG,
+ flags | NIX_TX_VWQE_F);
+ } else {
+ cn10k_sso_vwqe_split_tx(
+ mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr,
+ ev->sched_type, ws->tx_base, txq_data, flags);
+ }
+ rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+ return (meta & 0xFFFF);
+ }
+
+ m = ev->mbuf;
+ ref_cnt = m->refcnt;
txq = cn10k_sso_hws_xtract_meta(m, txq_data);
cn10k_nix_tx_skeleton(txq, cmd, flags);
/* Perform header writes before barrier for TSO */
@@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
}
if (!ev->sched_type)
- cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
roc_lmt_submit_steorl(lmt_id, pa);
@@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
-
return 1;
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 3f9751211a..cc1e141957 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -466,7 +466,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
- cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
cn9k_sso_txq_fc_wait(txq);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
@@ -478,7 +478,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
} else {
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
- cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
cn9k_sso_txq_fc_wait(txq);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_one(cmd, txq->lmt_addr,
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 7891b749df..9f9ceab8a1 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -75,26 +75,4 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
#endif
}
-static __rte_always_inline void
-cnxk_sso_hws_head_wait(uintptr_t tag_op)
-{
-#ifdef RTE_ARCH_ARM64
- uint64_t tag;
-
- asm volatile(" ldr %[tag], [%[tag_op]] \n"
- " tbnz %[tag], 35, done%= \n"
- " sevl \n"
- "rty%=: wfe \n"
- " ldr %[tag], [%[tag_op]] \n"
- " tbz %[tag], 35, rty%= \n"
- "done%=: \n"
- : [tag] "=&r"(tag)
- : [tag_op] "r"(tag_op));
-#else
- /* Wait for the HEAD to be set */
- while (!(plt_read64(tag_op) & BIT_ULL(35)))
- ;
-#endif
-}
-
#endif
diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 1f30bab59a..0e1276c60b 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -16,7 +16,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \
- flags); \
+ 0, flags); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index eb148b8e77..f75cae07ae 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -18,6 +18,7 @@
* Defining it from backwards to denote its been
* not used as offload flags to pick function
*/
+#define NIX_TX_VWQE_F BIT(14)
#define NIX_TX_MULTI_SEG_F BIT(15)
#define NIX_TX_NEED_SEND_HDR_W1 \
@@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
- uint64_t *cmd, const uint16_t flags)
+ uint64_t *cmd, uintptr_t base, const uint16_t flags)
{
struct cn10k_eth_txq *txq = tx_queue;
const rte_iova_t io_addr = txq->io_addr;
@@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
uint64_t lso_tun_fmt;
uint64_t data;
- NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ if (!(flags & NIX_TX_VWQE_F)) {
+ NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ /* Reduce the cached count */
+ txq->fc_cache_pkts -= pkts;
+ }
/* Get cmd skeleton */
cn10k_nix_tx_skeleton(txq, cmd, flags);
- /* Reduce the cached count */
- txq->fc_cache_pkts -= pkts;
-
if (flags & NIX_TX_OFFLOAD_TSO_F)
lso_tun_fmt = txq->lso_tun_fmt;
@@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
}
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
/* Trigger LMTST */
if (burst > 16) {
data = cn10k_nix_tx_steor_data(flags);
@@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
struct cn10k_eth_txq *txq = tx_queue;
uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
@@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
shft += 3;
}
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
data0 = (uint64_t)data128;
data1 = (uint64_t)(data128 >> 64);
/* Make data0 similar to data1 */
@@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
@@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64_t data[2];
} wd;
- NIX_XMIT_FC_OR_RETURN(txq, pkts);
-
- scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
- pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ if (!(flags & NIX_TX_VWQE_F)) {
+ NIX_XMIT_FC_OR_RETURN(txq, pkts);
+ scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ /* Reduce the cached count */
+ txq->fc_cache_pkts -= pkts;
+ } else {
+ scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
+ pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
+ }
- /* Reduce the cached count */
- txq->fc_cache_pkts -= pkts;
/* Perform header writes before barrier for TSO */
if (flags & NIX_TX_OFFLOAD_TSO_F) {
for (i = 0; i < pkts; i++)
@@ -1973,6 +1987,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (flags & NIX_TX_MULTI_SEG_F)
wd.data[0] >>= 16;
+ if (flags & NIX_TX_VWQE_F)
+ roc_sso_hws_head_wait(base);
+
/* Trigger LMTST */
if (lnum > 16) {
if (!(flags & NIX_TX_MULTI_SEG_F))
@@ -2029,10 +2046,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if (unlikely(scalar)) {
if (flags & NIX_TX_MULTI_SEG_F)
pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
- scalar, cmd, flags);
+ scalar, cmd, base,
+ flags);
else
pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
- cmd, flags);
+ cmd, base, flags);
}
return pkts;
@@ -2041,13 +2059,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
#else
static __rte_always_inline uint16_t
cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
- uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+ uint16_t pkts, uint64_t *cmd, uintptr_t base,
+ const uint16_t flags)
{
RTE_SET_USED(tx_queue);
RTE_SET_USED(tx_pkts);
RTE_SET_USED(pkts);
RTE_SET_USED(cmd);
RTE_SET_USED(flags);
+ RTE_SET_USED(base);
return 0;
}
#endif
diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c
index 33f6754722..4ea4c8a4e5 100644
--- a/drivers/net/cnxk/cn10k_tx_mseg.c
+++ b/drivers/net/cnxk/cn10k_tx_mseg.c
@@ -18,7 +18,8 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \
- (flags) | NIX_TX_MULTI_SEG_F); \
+ 0, (flags) \
+ | NIX_TX_MULTI_SEG_F); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index 34e3737501..a0350496ab 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -18,7 +18,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
- (flags)); \
+ 0, (flags)); \
}
NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
index 1fad81dbad..7f98f79b97 100644
--- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c
+++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
@@ -16,7 +16,7 @@
!((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
return 0; \
return cn10k_nix_xmit_pkts_vector( \
- tx_queue, tx_pkts, pkts, cmd, \
+ tx_queue, tx_pkts, pkts, cmd, 0, \
(flags) | NIX_TX_MULTI_SEG_F); \
}
--
2.17.1
^ permalink raw reply [flat|nested] 93+ messages in thread
* Re: [dpdk-dev] [PATCH v9 7/7] event/cnxk: add Tx event vector fastpath
2021-07-14 9:02 ` [dpdk-dev] [PATCH v9 7/7] event/cnxk: add Tx " pbhagavatula
@ 2021-07-16 12:19 ` Jerin Jacob
0 siblings, 0 replies; 93+ messages in thread
From: Jerin Jacob @ 2021-07-16 12:19 UTC (permalink / raw)
To: Pavan Nikhilesh
Cc: Jerin Jacob, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
Satha Rao, Shijith Thotton, dpdk-dev
On Wed, Jul 14, 2021 at 2:33 PM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Add Tx event vector fastpath, integrate event vector Tx routine
> into Tx burst.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Series Acked-by: Jerin Jacob <jerinj@marvell.com>
Series v9 Applied to dpdk-next-net-eventdev/for-main. Thanks
> ---
> drivers/common/cnxk/roc_sso.h | 23 ++++++
> drivers/event/cnxk/cn10k_eventdev.c | 3 +-
> drivers/event/cnxk/cn10k_worker.h | 104 +++++++++++++++++++++++++--
> drivers/event/cnxk/cn9k_worker.h | 4 +-
> drivers/event/cnxk/cnxk_worker.h | 22 ------
> drivers/net/cnxk/cn10k_tx.c | 2 +-
> drivers/net/cnxk/cn10k_tx.h | 52 +++++++++-----
> drivers/net/cnxk/cn10k_tx_mseg.c | 3 +-
> drivers/net/cnxk/cn10k_tx_vec.c | 2 +-
> drivers/net/cnxk/cn10k_tx_vec_mseg.c | 2 +-
> 10 files changed, 165 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
> index a6030e7d8a..b28f6089cc 100644
> --- a/drivers/common/cnxk/roc_sso.h
> +++ b/drivers/common/cnxk/roc_sso.h
> @@ -44,6 +44,29 @@ struct roc_sso {
> uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
> } __plt_cache_aligned;
>
> +static __plt_always_inline void
> +roc_sso_hws_head_wait(uintptr_t tag_op)
> +{
> +#ifdef RTE_ARCH_ARM64
> + uint64_t tag;
> +
> + asm volatile(PLT_CPU_FEATURE_PREAMBLE
> + " ldr %[tag], [%[tag_op]] \n"
> + " tbnz %[tag], 35, done%= \n"
> + " sevl \n"
> + "rty%=: wfe \n"
> + " ldr %[tag], [%[tag_op]] \n"
> + " tbz %[tag], 35, rty%= \n"
> + "done%=: \n"
> + : [tag] "=&r"(tag)
> + : [tag_op] "r"(tag_op));
> +#else
> + /* Wait for the SWTAG/SWTAG_FULL operation */
> + while (!(plt_read64(tag_op) & BIT_ULL(35)))
> + ;
> +#endif
> +}
> +
> /* SSO device initialization */
> int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso);
> int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso);
> diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
> index e85fa4785d..6f37c5bd23 100644
> --- a/drivers/event/cnxk/cn10k_eventdev.c
> +++ b/drivers/event/cnxk/cn10k_eventdev.c
> @@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
> if (ret)
> *caps = 0;
> else
> - *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
> + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
> + RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
>
> return 0;
> }
> diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
> index 7a48a6b17d..9cc0992063 100644
> --- a/drivers/event/cnxk/cn10k_worker.h
> +++ b/drivers/event/cnxk/cn10k_worker.h
> @@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
> NIX_RX_FASTPATH_MODES
> #undef R
>
> -static __rte_always_inline const struct cn10k_eth_txq *
> +static __rte_always_inline struct cn10k_eth_txq *
> cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
> const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
> {
> - return (const struct cn10k_eth_txq *)
> + return (struct cn10k_eth_txq *)
> txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
> }
>
> +static __rte_always_inline void
> +cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
> + uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr,
> + uint8_t sched_type, uintptr_t base,
> + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
> + const uint32_t flags)
> +{
> + uint16_t port[4], queue[4];
> + struct cn10k_eth_txq *txq;
> + uint16_t i, j;
> + uintptr_t pa;
> +
> + for (i = 0; i < nb_mbufs; i += 4) {
> + port[0] = mbufs[i]->port;
> + port[1] = mbufs[i + 1]->port;
> + port[2] = mbufs[i + 2]->port;
> + port[3] = mbufs[i + 3]->port;
> +
> + queue[0] = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
> + queue[1] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 1]);
> + queue[2] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 2]);
> + queue[3] = rte_event_eth_tx_adapter_txq_get(mbufs[i + 3]);
> +
> + if (((port[0] ^ port[1]) & (port[2] ^ port[3])) ||
> + ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) {
> +
> + for (j = 0; j < 4; j++) {
> + struct rte_mbuf *m = mbufs[i + j];
> +
> + txq = (struct cn10k_eth_txq *)
> + txq_data[port[j]][queue[j]];
> + cn10k_nix_tx_skeleton(txq, cmd, flags);
> + /* Perform header writes before barrier
> + * for TSO
> + */
> + if (flags & NIX_TX_OFFLOAD_TSO_F)
> + cn10k_nix_xmit_prepare_tso(m, flags);
> +
> + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags,
> + txq->lso_tun_fmt);
> + if (flags & NIX_TX_MULTI_SEG_F) {
> + const uint16_t segdw =
> + cn10k_nix_prepare_mseg(
> + m, (uint64_t *)lmt_addr,
> + flags);
> + pa = txq->io_addr | ((segdw - 1) << 4);
> + } else {
> + pa = txq->io_addr |
> + (cn10k_nix_tx_ext_subs(flags) + 1)
> + << 4;
> + }
> + if (!sched_type)
> + roc_sso_hws_head_wait(base +
> + SSOW_LF_GWS_TAG);
> +
> + roc_lmt_submit_steorl(lmt_id, pa);
> + }
> + } else {
> + txq = (struct cn10k_eth_txq *)
> + txq_data[port[0]][queue[0]];
> + cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base
> + + SSOW_LF_GWS_TAG,
> + flags | NIX_TX_VWQE_F);
> + }
> + }
> +}
> +
> static __rte_always_inline uint16_t
> cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
> uint64_t *cmd,
> const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
> const uint32_t flags)
> {
> - const struct cn10k_eth_txq *txq;
> - struct rte_mbuf *m = ev->mbuf;
> - uint16_t ref_cnt = m->refcnt;
> + struct cn10k_eth_txq *txq;
> + struct rte_mbuf *m;
> uintptr_t lmt_addr;
> + uint16_t ref_cnt;
> uint16_t lmt_id;
> uintptr_t pa;
>
> lmt_addr = ws->lmt_base;
> ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
> +
> + if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
> + struct rte_mbuf **mbufs = ev->vec->mbufs;
> + uint64_t meta = *(uint64_t *)ev->vec;
> +
> + if (meta & BIT(31)) {
> + txq = (struct cn10k_eth_txq *)
> + txq_data[meta >> 32][meta >> 48];
> +
> + cn10k_nix_xmit_pkts_vector(
> + txq, mbufs, meta & 0xFFFF, cmd,
> + ws->tx_base + SSOW_LF_GWS_TAG,
> + flags | NIX_TX_VWQE_F);
> + } else {
> + cn10k_sso_vwqe_split_tx(
> + mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr,
> + ev->sched_type, ws->tx_base, txq_data, flags);
> + }
> + rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
> + return (meta & 0xFFFF);
> + }
> +
> + m = ev->mbuf;
> + ref_cnt = m->refcnt;
> txq = cn10k_sso_hws_xtract_meta(m, txq_data);
> cn10k_nix_tx_skeleton(txq, cmd, flags);
> /* Perform header writes before barrier for TSO */
> @@ -346,7 +437,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
> pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
> }
> if (!ev->sched_type)
> - cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
> + roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
>
> roc_lmt_submit_steorl(lmt_id, pa);
>
> @@ -357,7 +448,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
>
> cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
> ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
> -
> return 1;
> }
>
> diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
> index 3f9751211a..cc1e141957 100644
> --- a/drivers/event/cnxk/cn9k_worker.h
> +++ b/drivers/event/cnxk/cn9k_worker.h
> @@ -466,7 +466,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
> const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
> if (!CNXK_TT_FROM_EVENT(ev->event)) {
> cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
> - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> cn9k_sso_txq_fc_wait(txq);
> if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
> cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
> @@ -478,7 +478,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
> } else {
> if (!CNXK_TT_FROM_EVENT(ev->event)) {
> cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
> - cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> cn9k_sso_txq_fc_wait(txq);
> if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
> cn9k_nix_xmit_one(cmd, txq->lmt_addr,
> diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
> index 7891b749df..9f9ceab8a1 100644
> --- a/drivers/event/cnxk/cnxk_worker.h
> +++ b/drivers/event/cnxk/cnxk_worker.h
> @@ -75,26 +75,4 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
> #endif
> }
>
> -static __rte_always_inline void
> -cnxk_sso_hws_head_wait(uintptr_t tag_op)
> -{
> -#ifdef RTE_ARCH_ARM64
> - uint64_t tag;
> -
> - asm volatile(" ldr %[tag], [%[tag_op]] \n"
> - " tbnz %[tag], 35, done%= \n"
> - " sevl \n"
> - "rty%=: wfe \n"
> - " ldr %[tag], [%[tag_op]] \n"
> - " tbz %[tag], 35, rty%= \n"
> - "done%=: \n"
> - : [tag] "=&r"(tag)
> - : [tag_op] "r"(tag_op));
> -#else
> - /* Wait for the HEAD to be set */
> - while (!(plt_read64(tag_op) & BIT_ULL(35)))
> - ;
> -#endif
> -}
> -
> #endif
> diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
> index 1f30bab59a..0e1276c60b 100644
> --- a/drivers/net/cnxk/cn10k_tx.c
> +++ b/drivers/net/cnxk/cn10k_tx.c
> @@ -16,7 +16,7 @@
> !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
> return 0; \
> return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \
> - flags); \
> + 0, flags); \
> }
>
> NIX_TX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
> index eb148b8e77..f75cae07ae 100644
> --- a/drivers/net/cnxk/cn10k_tx.h
> +++ b/drivers/net/cnxk/cn10k_tx.h
> @@ -18,6 +18,7 @@
> * Defining it from backwards to denote its been
> * not used as offload flags to pick function
> */
> +#define NIX_TX_VWQE_F BIT(14)
> #define NIX_TX_MULTI_SEG_F BIT(15)
>
> #define NIX_TX_NEED_SEND_HDR_W1 \
> @@ -519,7 +520,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
>
> static __rte_always_inline uint16_t
> cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
> - uint64_t *cmd, const uint16_t flags)
> + uint64_t *cmd, uintptr_t base, const uint16_t flags)
> {
> struct cn10k_eth_txq *txq = tx_queue;
> const rte_iova_t io_addr = txq->io_addr;
> @@ -528,14 +529,15 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
> uint64_t lso_tun_fmt;
> uint64_t data;
>
> - NIX_XMIT_FC_OR_RETURN(txq, pkts);
> + if (!(flags & NIX_TX_VWQE_F)) {
> + NIX_XMIT_FC_OR_RETURN(txq, pkts);
> + /* Reduce the cached count */
> + txq->fc_cache_pkts -= pkts;
> + }
>
> /* Get cmd skeleton */
> cn10k_nix_tx_skeleton(txq, cmd, flags);
>
> - /* Reduce the cached count */
> - txq->fc_cache_pkts -= pkts;
> -
> if (flags & NIX_TX_OFFLOAD_TSO_F)
> lso_tun_fmt = txq->lso_tun_fmt;
>
> @@ -558,6 +560,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
> lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
> }
>
> + if (flags & NIX_TX_VWQE_F)
> + roc_sso_hws_head_wait(base);
> +
> /* Trigger LMTST */
> if (burst > 16) {
> data = cn10k_nix_tx_steor_data(flags);
> @@ -604,7 +609,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
>
> static __rte_always_inline uint16_t
> cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
> - uint16_t pkts, uint64_t *cmd, const uint16_t flags)
> + uint16_t pkts, uint64_t *cmd, uintptr_t base,
> + const uint16_t flags)
> {
> struct cn10k_eth_txq *txq = tx_queue;
> uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
> @@ -652,6 +658,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
> shft += 3;
> }
>
> + if (flags & NIX_TX_VWQE_F)
> + roc_sso_hws_head_wait(base);
> +
> data0 = (uint64_t)data128;
> data1 = (uint64_t)(data128 >> 64);
> /* Make data0 similar to data1 */
> @@ -984,7 +993,8 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
>
> static __rte_always_inline uint16_t
> cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> - uint16_t pkts, uint64_t *cmd, const uint16_t flags)
> + uint16_t pkts, uint64_t *cmd, uintptr_t base,
> + const uint16_t flags)
> {
> uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
> uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
> @@ -1013,13 +1023,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> uint64_t data[2];
> } wd;
>
> - NIX_XMIT_FC_OR_RETURN(txq, pkts);
> -
> - scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
> - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
> + if (!(flags & NIX_TX_VWQE_F)) {
> + NIX_XMIT_FC_OR_RETURN(txq, pkts);
> + scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
> + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
> + /* Reduce the cached count */
> + txq->fc_cache_pkts -= pkts;
> + } else {
> + scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
> + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
> + }
>
> - /* Reduce the cached count */
> - txq->fc_cache_pkts -= pkts;
> /* Perform header writes before barrier for TSO */
> if (flags & NIX_TX_OFFLOAD_TSO_F) {
> for (i = 0; i < pkts; i++)
> @@ -1973,6 +1987,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> if (flags & NIX_TX_MULTI_SEG_F)
> wd.data[0] >>= 16;
>
> + if (flags & NIX_TX_VWQE_F)
> + roc_sso_hws_head_wait(base);
> +
> /* Trigger LMTST */
> if (lnum > 16) {
> if (!(flags & NIX_TX_MULTI_SEG_F))
> @@ -2029,10 +2046,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> if (unlikely(scalar)) {
> if (flags & NIX_TX_MULTI_SEG_F)
> pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
> - scalar, cmd, flags);
> + scalar, cmd, base,
> + flags);
> else
> pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
> - cmd, flags);
> + cmd, base, flags);
> }
>
> return pkts;
> @@ -2041,13 +2059,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> #else
> static __rte_always_inline uint16_t
> cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> - uint16_t pkts, uint64_t *cmd, const uint16_t flags)
> + uint16_t pkts, uint64_t *cmd, uintptr_t base,
> + const uint16_t flags)
> {
> RTE_SET_USED(tx_queue);
> RTE_SET_USED(tx_pkts);
> RTE_SET_USED(pkts);
> RTE_SET_USED(cmd);
> RTE_SET_USED(flags);
> + RTE_SET_USED(base);
> return 0;
> }
> #endif
> diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c
> index 33f6754722..4ea4c8a4e5 100644
> --- a/drivers/net/cnxk/cn10k_tx_mseg.c
> +++ b/drivers/net/cnxk/cn10k_tx_mseg.c
> @@ -18,7 +18,8 @@
> !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
> return 0; \
> return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \
> - (flags) | NIX_TX_MULTI_SEG_F); \
> + 0, (flags) \
> + | NIX_TX_MULTI_SEG_F); \
> }
>
> NIX_TX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
> index 34e3737501..a0350496ab 100644
> --- a/drivers/net/cnxk/cn10k_tx_vec.c
> +++ b/drivers/net/cnxk/cn10k_tx_vec.c
> @@ -18,7 +18,7 @@
> !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
> return 0; \
> return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
> - (flags)); \
> + 0, (flags)); \
> }
>
> NIX_TX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
> index 1fad81dbad..7f98f79b97 100644
> --- a/drivers/net/cnxk/cn10k_tx_vec_mseg.c
> +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
> @@ -16,7 +16,7 @@
> !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
> return 0; \
> return cn10k_nix_xmit_pkts_vector( \
> - tx_queue, tx_pkts, pkts, cmd, \
> + tx_queue, tx_pkts, pkts, cmd, 0, \
> (flags) | NIX_TX_MULTI_SEG_F); \
> }
>
> --
> 2.17.1
>
^ permalink raw reply [flat|nested] 93+ messages in thread