* [PATCH 0/3] rewrite fastpath routines
@ 2023-10-11 1:50 Vamsi Attunuru
2023-10-11 1:50 ` [PATCH 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
` (3 more replies)
0 siblings, 4 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-11 1:50 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
This series adds new fastpath routines for cn10k & cn9k endpoint
devices and supports 32B Tx desciptor format which improves the
performance.
Shijith Thotton (1):
net/octeon_ep: support 32B IQ descriptor size
Vamsi Attunuru (2):
net/octeon_ep: clean up receive routine
net/octeon_ep: add new fastpath routines
drivers/net/octeon_ep/cnxk_ep_rx.c | 309 ++++++++++++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_tx.c | 209 +++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_vf.c | 12 +-
drivers/net/octeon_ep/cnxk_ep_vf.h | 13 ++
drivers/net/octeon_ep/meson.build | 2 +
drivers/net/octeon_ep/otx2_ep_vf.c | 11 +-
drivers/net/octeon_ep/otx_ep_common.h | 127 ++++++-----
drivers/net/octeon_ep/otx_ep_ethdev.c | 69 +++++-
drivers/net/octeon_ep/otx_ep_rxtx.c | 263 +++++++---------------
drivers/net/octeon_ep/otx_ep_rxtx.h | 38 +++-
drivers/net/octeon_ep/otx_ep_vf.c | 8 +
11 files changed, 804 insertions(+), 257 deletions(-)
create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx.c
create mode 100644 drivers/net/octeon_ep/cnxk_ep_tx.c
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 1/3] net/octeon_ep: support 32B IQ descriptor size
2023-10-11 1:50 [PATCH 0/3] rewrite fastpath routines Vamsi Attunuru
@ 2023-10-11 1:50 ` Vamsi Attunuru
2023-10-11 1:50 ` [PATCH 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
` (2 subsequent siblings)
3 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-11 1:50 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton
From: Shijith Thotton <sthotton@marvell.com>
Update input queue setup to consider descriptor size in driver conf.
The default instruction size for otx2 and cnxk devices has been updated
to 32 bytes.
Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
drivers/net/octeon_ep/cnxk_ep_vf.c | 10 +++++++++-
drivers/net/octeon_ep/otx2_ep_vf.c | 10 +++++++++-
drivers/net/octeon_ep/otx_ep_common.h | 4 ++++
drivers/net/octeon_ep/otx_ep_rxtx.c | 8 +++-----
drivers/net/octeon_ep/otx_ep_vf.c | 8 ++++++++
5 files changed, 33 insertions(+), 7 deletions(-)
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 92c2d2ca5c..7b3669fe0c 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -106,6 +106,14 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(CNXK_EP_R_IN_CTL_IS_64B);
+ else
+ reg_val |= CNXK_EP_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + CNXK_EP_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + CNXK_EP_R_IN_INSTR_RSIZE(iq_no));
@@ -354,7 +362,7 @@ static const struct otx_ep_config default_cnxk_ep_conf = {
/* IQ attributes */
.iq = {
.max_iqs = OTX_EP_CFG_IO_QUEUES,
- .instr_type = OTX_EP_64BYTE_INSTR,
+ .instr_type = OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c b/drivers/net/octeon_ep/otx2_ep_vf.c
index ced3a415a5..f72b8d25d7 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -256,6 +256,14 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(SDP_VF_R_IN_CTL_IS_64B);
+ else
+ reg_val |= SDP_VF_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + SDP_VF_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_RSIZE(iq_no));
@@ -500,7 +508,7 @@ static const struct otx_ep_config default_otx2_ep_conf = {
/* IQ attributes */
.iq = {
.max_iqs = OTX_EP_CFG_IO_QUEUES,
- .instr_type = OTX_EP_64BYTE_INSTR,
+ .instr_type = OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx_ep_common.h b/drivers/net/octeon_ep/otx_ep_common.h
index c150cbe619..90e059cad0 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -11,6 +11,7 @@
#define OTX_EP_MAX_RINGS_PER_VF (8)
#define OTX_EP_CFG_IO_QUEUES OTX_EP_MAX_RINGS_PER_VF
+#define OTX_EP_32BYTE_INSTR (32)
#define OTX_EP_64BYTE_INSTR (64)
/*
* Backpressure for SDP is configured on Octeon, and the minimum queue sizes
@@ -215,6 +216,9 @@ struct otx_ep_instr_queue {
/* Number of descriptors in this ring. */
uint32_t nb_desc;
+ /* Size of the descriptor. */
+ uint8_t desc_size;
+
/* Input ring index, where the driver should write the next packet */
uint32_t host_write_index;
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c b/drivers/net/octeon_ep/otx_ep_rxtx.c
index b37fc8109f..5b759d759b 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -484,7 +484,7 @@ otx_ep_ring_doorbell(struct otx_ep_device *otx_ep __rte_unused,
static inline int
post_iqcmd(struct otx_ep_instr_queue *iq, uint8_t *iqcmd)
{
- uint8_t *iqptr, cmdsize;
+ uint8_t *iqptr;
/* This ensures that the read index does not wrap around to
* the same position if queue gets full before OCTEON 9 could
@@ -494,10 +494,8 @@ post_iqcmd(struct otx_ep_instr_queue *iq, uint8_t *iqcmd)
return OTX_EP_IQ_SEND_FAILED;
/* Copy cmd into iq */
- cmdsize = 64;
- iqptr = iq->base_addr + (iq->host_write_index << 6);
-
- rte_memcpy(iqptr, iqcmd, cmdsize);
+ iqptr = iq->base_addr + (iq->host_write_index * iq->desc_size);
+ rte_memcpy(iqptr, iqcmd, iq->desc_size);
/* Increment the host write index */
iq->host_write_index =
diff --git a/drivers/net/octeon_ep/otx_ep_vf.c b/drivers/net/octeon_ep/otx_ep_vf.c
index 4f3538146b..236b7a874c 100644
--- a/drivers/net/octeon_ep/otx_ep_vf.c
+++ b/drivers/net/octeon_ep/otx_ep_vf.c
@@ -120,6 +120,14 @@ otx_ep_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (iq->desc_size == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(OTX_EP_R_IN_CTL_IS_64B);
+ else
+ reg_val |= OTX_EP_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + OTX_EP_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
otx_ep_write64(iq->base_addr_dma, otx_ep->hw_addr,
OTX_EP_R_IN_INSTR_BADDR(iq_no));
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 2/3] net/octeon_ep: clean up receive routine
2023-10-11 1:50 [PATCH 0/3] rewrite fastpath routines Vamsi Attunuru
2023-10-11 1:50 ` [PATCH 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
@ 2023-10-11 1:50 ` Vamsi Attunuru
2023-10-11 1:50 ` [PATCH 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
2023-10-11 8:36 ` [PATCH v2 0/3] rewrite " Vamsi Attunuru
3 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-11 1:50 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
Patch improves Rx routine and pkt count update routines,
packet count update routines need to drain inflight ISM
memory updates while decrementing the packet count register.
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
---
drivers/net/octeon_ep/otx_ep_rxtx.c | 162 ++++++++++++----------------
1 file changed, 68 insertions(+), 94 deletions(-)
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c b/drivers/net/octeon_ep/otx_ep_rxtx.c
index 5b759d759b..ea7c9a5d62 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -442,7 +442,14 @@ otx_vf_update_read_index(struct otx_ep_instr_queue *iq)
* when count above halfway to saturation.
*/
rte_write32(val, iq->inst_cnt_reg);
- *iq->inst_cnt_ism = 0;
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ while (__atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ rte_mb();
+ }
+
iq->inst_cnt_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
@@ -565,9 +572,7 @@ prepare_xmit_gather_list(struct otx_ep_instr_queue *iq, struct rte_mbuf *m, uint
finfo = &iq->req_list[iq->host_write_index].finfo;
*dptr = rte_mem_virt2iova(finfo->g.sg);
- ih->s.tlen = pkt_len + ih->s.fsz;
- ih->s.gsz = frags;
- ih->s.gather = 1;
+ ih->u64 |= ((1ULL << 62) | ((uint64_t)frags << 48) | (pkt_len + ih->s.fsz));
while (frags--) {
finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
@@ -750,36 +755,26 @@ otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
static uint32_t
otx_ep_droq_refill(struct otx_ep_droq *droq)
{
- struct otx_ep_droq_desc *desc_ring;
+ struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
struct otx_ep_droq_info *info;
struct rte_mbuf *buf = NULL;
uint32_t desc_refilled = 0;
- desc_ring = droq->desc_ring;
-
while (droq->refill_count && (desc_refilled < droq->nb_desc)) {
- /* If a valid buffer exists (happens if there is no dispatch),
- * reuse the buffer, else allocate.
- */
- if (droq->recv_buf_list[droq->refill_idx] != NULL)
- break;
-
buf = rte_pktmbuf_alloc(droq->mpool);
/* If a buffer could not be allocated, no point in
* continuing
*/
- if (buf == NULL) {
+ if (unlikely(!buf)) {
droq->stats.rx_alloc_failure++;
break;
}
info = rte_pktmbuf_mtod(buf, struct otx_ep_droq_info *);
- memset(info, 0, sizeof(*info));
+ info->length = 0;
droq->recv_buf_list[droq->refill_idx] = buf;
desc_ring[droq->refill_idx].buffer_ptr =
rte_mbuf_data_iova_default(buf);
-
-
droq->refill_idx = otx_ep_incr_index(droq->refill_idx, 1,
droq->nb_desc);
@@ -791,21 +786,18 @@ otx_ep_droq_refill(struct otx_ep_droq *droq)
}
static struct rte_mbuf *
-otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
- struct otx_ep_droq *droq, int next_fetch)
+otx_ep_droq_read_packet(struct otx_ep_device *otx_ep, struct otx_ep_droq *droq, int next_fetch)
{
volatile struct otx_ep_droq_info *info;
- struct rte_mbuf *droq_pkt2 = NULL;
- struct rte_mbuf *droq_pkt = NULL;
- struct rte_net_hdr_lens hdr_lens;
- struct otx_ep_droq_info *info2;
+ struct rte_mbuf *mbuf_next = NULL;
+ struct rte_mbuf *mbuf = NULL;
uint64_t total_pkt_len;
uint32_t pkt_len = 0;
int next_idx;
- droq_pkt = droq->recv_buf_list[droq->read_idx];
- droq_pkt2 = droq->recv_buf_list[droq->read_idx];
- info = rte_pktmbuf_mtod(droq_pkt, struct otx_ep_droq_info *);
+ mbuf = droq->recv_buf_list[droq->read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
/* make sure info is available */
rte_rmb();
if (unlikely(!info->length)) {
@@ -826,32 +818,25 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
assert(0);
}
}
+
if (next_fetch) {
next_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
- droq_pkt2 = droq->recv_buf_list[next_idx];
- info2 = rte_pktmbuf_mtod(droq_pkt2, struct otx_ep_droq_info *);
- rte_prefetch_non_temporal((const void *)info2);
+ mbuf_next = droq->recv_buf_list[next_idx];
+ rte_prefetch0(rte_pktmbuf_mtod(mbuf_next, void *));
}
- info->length = rte_bswap64(info->length);
+ info->length = rte_bswap16(info->length >> 48);
/* Deduce the actual data size */
total_pkt_len = info->length + OTX_EP_INFO_SIZE;
if (total_pkt_len <= droq->buffer_size) {
- droq_pkt = droq->recv_buf_list[droq->read_idx];
- if (likely(droq_pkt != NULL)) {
- droq_pkt->data_off += OTX_EP_INFO_SIZE;
- /* otx_ep_dbg("OQ: pkt_len[%ld], buffer_size %d\n",
- * (long)info->length, droq->buffer_size);
- */
- pkt_len = (uint32_t)info->length;
- droq_pkt->pkt_len = pkt_len;
- droq_pkt->data_len = pkt_len;
- droq_pkt->port = otx_ep->port_id;
- droq->recv_buf_list[droq->read_idx] = NULL;
- droq->read_idx = otx_ep_incr_index(droq->read_idx, 1,
- droq->nb_desc);
- droq->refill_count++;
- }
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ pkt_len = (uint32_t)info->length;
+ mbuf->pkt_len = pkt_len;
+ mbuf->data_len = pkt_len;
+ mbuf->port = otx_ep->port_id;
+ droq->recv_buf_list[droq->read_idx] = NULL;
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
+ droq->refill_count++;
} else {
struct rte_mbuf *first_buf = NULL;
struct rte_mbuf *last_buf = NULL;
@@ -863,61 +848,50 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
while (pkt_len < total_pkt_len) {
int cpy_len = 0;
- cpy_len = ((pkt_len + droq->buffer_size) >
- total_pkt_len)
- ? ((uint32_t)total_pkt_len -
- pkt_len)
+ cpy_len = ((pkt_len + droq->buffer_size) > total_pkt_len)
+ ? ((uint32_t)total_pkt_len - pkt_len)
: droq->buffer_size;
- droq_pkt = droq->recv_buf_list[droq->read_idx];
+ mbuf = droq->recv_buf_list[droq->read_idx];
droq->recv_buf_list[droq->read_idx] = NULL;
- if (likely(droq_pkt != NULL)) {
+ if (likely(mbuf)) {
/* Note the first seg */
if (!pkt_len)
- first_buf = droq_pkt;
+ first_buf = mbuf;
- droq_pkt->port = otx_ep->port_id;
+ mbuf->port = otx_ep->port_id;
if (!pkt_len) {
- droq_pkt->data_off +=
- OTX_EP_INFO_SIZE;
- droq_pkt->pkt_len =
- cpy_len - OTX_EP_INFO_SIZE;
- droq_pkt->data_len =
- cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_len = cpy_len - OTX_EP_INFO_SIZE;
} else {
- droq_pkt->pkt_len = cpy_len;
- droq_pkt->data_len = cpy_len;
+ mbuf->pkt_len = cpy_len;
+ mbuf->data_len = cpy_len;
}
if (pkt_len) {
first_buf->nb_segs++;
- first_buf->pkt_len += droq_pkt->pkt_len;
+ first_buf->pkt_len += mbuf->pkt_len;
}
if (last_buf)
- last_buf->next = droq_pkt;
+ last_buf->next = mbuf;
- last_buf = droq_pkt;
+ last_buf = mbuf;
} else {
otx_ep_err("no buf\n");
assert(0);
}
pkt_len += cpy_len;
- droq->read_idx = otx_ep_incr_index(droq->read_idx, 1,
- droq->nb_desc);
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
droq->refill_count++;
}
- droq_pkt = first_buf;
+ mbuf = first_buf;
}
- droq_pkt->packet_type = rte_net_get_ptype(droq_pkt, &hdr_lens,
- RTE_PTYPE_ALL_MASK);
- droq_pkt->l2_len = hdr_lens.l2_len;
- droq_pkt->l3_len = hdr_lens.l3_len;
- droq_pkt->l4_len = hdr_lens.l4_len;
- return droq_pkt;
+ return mbuf;
}
static inline uint32_t
@@ -941,7 +915,14 @@ otx_ep_check_droq_pkts(struct otx_ep_droq *droq)
* when count above halfway to saturation.
*/
rte_write32(val, droq->pkts_sent_reg);
- *droq->pkts_sent_ism = 0;
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ while (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ rte_mb();
+ }
+
droq->pkts_sent_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
@@ -950,36 +931,30 @@ otx_ep_check_droq_pkts(struct otx_ep_droq *droq)
return new_pkts;
}
+static inline int32_t __rte_hot
+otx_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
+{
+ if (unlikely(droq->pkts_pending < nb_pkts))
+ otx_ep_check_droq_pkts(droq);
+
+ return RTE_MIN(nb_pkts, droq->pkts_pending);
+}
+
/* Check for response arrival from OCTEON 9
* returns number of requests completed
*/
uint16_t
-otx_ep_recv_pkts(void *rx_queue,
- struct rte_mbuf **rx_pkts,
- uint16_t budget)
+otx_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
{
struct otx_ep_droq *droq = rx_queue;
struct otx_ep_device *otx_ep;
struct rte_mbuf *oq_pkt;
-
- uint32_t pkts = 0;
+ uint16_t pkts, new_pkts;
uint32_t valid_pkts = 0;
- uint32_t new_pkts = 0;
int next_fetch;
otx_ep = droq->otx_ep_dev;
-
- if (droq->pkts_pending > budget) {
- new_pkts = budget;
- } else {
- new_pkts = droq->pkts_pending;
- new_pkts += otx_ep_check_droq_pkts(droq);
- if (new_pkts > budget)
- new_pkts = budget;
- }
-
- if (!new_pkts)
- goto update_credit; /* No pkts at this moment */
+ new_pkts = otx_ep_rx_pkts_to_process(droq, nb_pkts);
for (pkts = 0; pkts < new_pkts; pkts++) {
/* Push the received pkt to application */
@@ -1004,7 +979,6 @@ otx_ep_recv_pkts(void *rx_queue,
droq->pkts_pending -= pkts;
/* Refill DROQ buffers */
-update_credit:
if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
int desc_refilled = otx_ep_droq_refill(droq);
@@ -1012,7 +986,7 @@ otx_ep_recv_pkts(void *rx_queue,
* that when we update the credits the data in memory is
* accurate.
*/
- rte_wmb();
+ rte_io_wmb();
rte_write32(desc_refilled, droq->pkts_credit_reg);
} else {
/*
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 3/3] net/octeon_ep: add new fastpath routines
2023-10-11 1:50 [PATCH 0/3] rewrite fastpath routines Vamsi Attunuru
2023-10-11 1:50 ` [PATCH 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
2023-10-11 1:50 ` [PATCH 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
@ 2023-10-11 1:50 ` Vamsi Attunuru
2023-10-11 8:36 ` [PATCH v2 0/3] rewrite " Vamsi Attunuru
3 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-11 1:50 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
Adds new fastpath routines for cn10k & cn9k endpoint
devices and assigns the fastpath routines based on
the offload flags.
Patch also adds misc changes to improve performance
and code-readability.
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
---
drivers/net/octeon_ep/cnxk_ep_rx.c | 309 ++++++++++++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_tx.c | 209 +++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_vf.c | 2 +
drivers/net/octeon_ep/cnxk_ep_vf.h | 13 ++
drivers/net/octeon_ep/meson.build | 2 +
drivers/net/octeon_ep/otx2_ep_vf.c | 1 +
drivers/net/octeon_ep/otx_ep_common.h | 125 ++++++-----
drivers/net/octeon_ep/otx_ep_ethdev.c | 69 +++++-
drivers/net/octeon_ep/otx_ep_rxtx.c | 93 +-------
drivers/net/octeon_ep/otx_ep_rxtx.h | 38 +++-
10 files changed, 704 insertions(+), 157 deletions(-)
diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.c b/drivers/net/octeon_ep/cnxk_ep_rx.c
new file mode 100644
index 0000000000..74f0011283
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.c
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "otx_ep_common.h"
+#include "otx2_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static inline int
+cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)
+{
+ struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t refill_idx = droq->refill_idx;
+ struct rte_mbuf *buf;
+ uint32_t i;
+ int rc;
+
+ rc = rte_pktmbuf_alloc_bulk(droq->mpool, &recv_buf_list[refill_idx], count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return rc;
+ }
+
+ for (i = 0; i < count; i++) {
+ buf = recv_buf_list[refill_idx];
+ desc_ring[refill_idx].buffer_ptr = rte_mbuf_data_iova_default(buf);
+ refill_idx++;
+ }
+
+ droq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, droq->nb_desc);
+ droq->refill_count -= count;
+
+ return 0;
+}
+
+static inline void
+cnxk_ep_rx_refill(struct otx_ep_droq *droq)
+{
+ uint32_t desc_refilled = 0, count;
+ uint32_t nb_desc = droq->nb_desc;
+ uint32_t refill_idx = droq->refill_idx;
+ int rc;
+
+ if (unlikely(droq->read_idx == refill_idx))
+ return;
+
+ if (refill_idx < droq->read_idx) {
+ count = droq->read_idx - refill_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+ desc_refilled = count;
+ } else {
+ count = nb_desc - refill_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+
+ desc_refilled = count;
+ count = droq->read_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+ desc_refilled += count;
+ }
+
+ /* Flush the droq descriptor data to memory to be sure
+ * that when we update the credits the data in memory is
+ * accurate.
+ */
+ rte_io_wmb();
+ rte_write32(desc_refilled, droq->pkts_credit_reg);
+}
+
+static inline uint32_t
+cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
+{
+ uint32_t new_pkts;
+ uint32_t val;
+
+ /* Batch subtractions from the HW counter to reduce PCIe traffic
+ * This adds an extra local variable, but almost halves the
+ * number of PCIe writes.
+ */
+ val = __atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED);
+ new_pkts = val - droq->pkts_sent_ism_prev;
+ droq->pkts_sent_ism_prev = val;
+
+ if (val > (uint32_t)(1 << 31)) {
+ /* Only subtract the packet count in the HW counter
+ * when count above halfway to saturation.
+ */
+ rte_write64((uint64_t)val, droq->pkts_sent_reg);
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ while (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ rte_mb();
+ }
+
+ droq->pkts_sent_ism_prev = 0;
+ }
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ droq->pkts_pending += new_pkts;
+
+ return new_pkts;
+}
+
+static inline int16_t __rte_hot
+cnxk_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
+{
+ if (droq->pkts_pending < nb_pkts)
+ cnxk_ep_check_rx_pkts(droq);
+
+ return RTE_MIN(nb_pkts, droq->pkts_pending);
+}
+
+static __rte_always_inline void
+cnxk_ep_process_pkts_scalar(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, uint16_t new_pkts)
+{
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t bytes_rsvd = 0, read_idx = droq->read_idx;
+ uint16_t port_id = droq->otx_ep_dev->port_id;
+ uint16_t nb_desc = droq->nb_desc;
+ uint16_t pkts;
+
+ for (pkts = 0; pkts < new_pkts; pkts++) {
+ struct otx_ep_droq_info *info;
+ struct rte_mbuf *mbuf;
+ uint16_t pkt_len;
+
+ mbuf = recv_buf_list[read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+ read_idx = otx_ep_incr_index(read_idx, 1, nb_desc);
+ pkt_len = rte_bswap16(info->length >> 48);
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = pkt_len;
+ mbuf->data_len = pkt_len;
+ mbuf->port = port_id;
+ rx_pkts[pkts] = mbuf;
+ bytes_rsvd += pkt_len;
+ }
+ droq->read_idx = read_idx;
+
+ droq->refill_count += new_pkts;
+ droq->pkts_pending -= new_pkts;
+ /* Stats */
+ droq->stats.pkts_received += new_pkts;
+ droq->stats.bytes_received += bytes_rsvd;
+}
+
+static __rte_always_inline void
+cnxk_ep_process_pkts_scalar_mseg(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq,
+ uint16_t new_pkts)
+{
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t total_pkt_len, bytes_rsvd = 0;
+ uint16_t port_id = droq->otx_ep_dev->port_id;
+ uint16_t nb_desc = droq->nb_desc;
+ uint16_t pkts;
+
+ for (pkts = 0; pkts < new_pkts; pkts++) {
+ struct otx_ep_droq_info *info;
+ struct rte_mbuf *first_buf = NULL;
+ struct rte_mbuf *last_buf = NULL;
+ struct rte_mbuf *mbuf;
+ uint32_t pkt_len = 0;
+
+ mbuf = recv_buf_list[droq->read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
+ total_pkt_len = rte_bswap16(info->length >> 48) + OTX_EP_INFO_SIZE;
+
+ while (pkt_len < total_pkt_len) {
+ int cpy_len;
+
+ cpy_len = ((pkt_len + droq->buffer_size) > total_pkt_len)
+ ? ((uint32_t)total_pkt_len - pkt_len) : droq->buffer_size;
+
+ mbuf = droq->recv_buf_list[droq->read_idx];
+
+ if (!pkt_len) {
+ /* Note the first seg */
+ first_buf = mbuf;
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_len = cpy_len - OTX_EP_INFO_SIZE;
+ } else {
+ mbuf->pkt_len = cpy_len;
+ mbuf->data_len = cpy_len;
+ first_buf->nb_segs++;
+ first_buf->pkt_len += mbuf->pkt_len;
+ }
+
+ if (last_buf)
+ last_buf->next = mbuf;
+
+ last_buf = mbuf;
+
+ pkt_len += cpy_len;
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, nb_desc);
+ droq->refill_count++;
+ }
+ mbuf = first_buf;
+ mbuf->port = port_id;
+ rx_pkts[pkts] = mbuf;
+ bytes_rsvd += pkt_len;
+ }
+
+ droq->refill_count += new_pkts;
+ droq->pkts_pending -= pkts;
+ /* Stats */
+ droq->stats.pkts_received += pkts;
+ droq->stats.bytes_received += bytes_rsvd;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD)
+ cnxk_ep_rx_refill(droq);
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cn9k_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
+ cnxk_ep_rx_refill(droq);
+ } else {
+ /* SDP output goes into DROP state when output doorbell count
+ * goes below drop count. When door bell count is written with
+ * a value greater than drop count SDP output should come out
+ * of DROP state. Due to a race condition this is not happening.
+ * Writing doorbell register with 0 again may make SDP output
+ * come out of this state.
+ */
+
+ rte_write32(0, droq->pkts_credit_reg);
+ }
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar_mseg(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD)
+ cnxk_ep_rx_refill(droq);
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cn9k_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar_mseg(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
+ cnxk_ep_rx_refill(droq);
+ } else {
+ /* SDP output goes into DROP state when output doorbell count
+ * goes below drop count. When door bell count is written with
+ * a value greater than drop count SDP output should come out
+ * of DROP state. Due to a race condition this is not happening.
+ * Writing doorbell register with 0 again may make SDP output
+ * come out of this state.
+ */
+
+ rte_write32(0, droq->pkts_credit_reg);
+ }
+
+ return new_pkts;
+}
diff --git a/drivers/net/octeon_ep/cnxk_ep_tx.c b/drivers/net/octeon_ep/cnxk_ep_tx.c
new file mode 100644
index 0000000000..9f11a2f317
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_tx.c
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "cnxk_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static uint32_t
+cnxk_vf_update_read_index(struct otx_ep_instr_queue *iq)
+{
+ uint32_t val;
+
+ /* Batch subtractions from the HW counter to reduce PCIe traffic
+ * This adds an extra local variable, but almost halves the
+ * number of PCIe writes.
+ */
+ val = __atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED);
+ iq->inst_cnt += val - iq->inst_cnt_ism_prev;
+ iq->inst_cnt_ism_prev = val;
+
+ if (val > (uint32_t)(1 << 31)) {
+ /* Only subtract the packet count in the HW counter
+ * when count above halfway to saturation.
+ */
+ rte_write64((uint64_t)val, iq->inst_cnt_reg);
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ while (__atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ rte_mb();
+ }
+
+ iq->inst_cnt_ism_prev = 0;
+ }
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+
+ /* Modulo of the new index with the IQ size will give us
+ * the new index.
+ */
+ return iq->inst_cnt & (iq->nb_desc - 1);
+}
+
+static inline void
+cnxk_ep_flush_iq(struct otx_ep_instr_queue *iq)
+{
+ uint32_t instr_processed = 0;
+ uint32_t cnt = 0;
+
+ iq->otx_read_index = cnxk_vf_update_read_index(iq);
+
+ if (unlikely(iq->flush_index == iq->otx_read_index))
+ return;
+
+ if (iq->flush_index < iq->otx_read_index) {
+ instr_processed = iq->otx_read_index - iq->flush_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc);
+ } else {
+ cnt = iq->nb_desc - iq->flush_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], cnt);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, cnt, iq->nb_desc);
+
+ instr_processed = iq->otx_read_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc);
+
+ instr_processed += cnt;
+ }
+
+ iq->stats.instr_processed = instr_processed;
+ iq->instr_pending -= instr_processed;
+}
+
+static inline void
+set_sg_size(struct otx_ep_sg_entry *sg_entry, uint16_t size, uint32_t pos)
+{
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ sg_entry->u.size[pos] = size;
+#elif RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+ sg_entry->u.size[(OTX_EP_NUM_SG_PTRS - 1) - pos] = size;
+#endif
+}
+
+static __rte_always_inline void
+cnxk_ep_xmit_pkts_scalar(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq, uint16_t nb_pkts)
+{
+ struct cnxk_ep_instr_32B *iqcmd;
+ struct rte_mbuf *m;
+ uint32_t pkt_len;
+ uint32_t tx_bytes = 0;
+ uint32_t write_idx = iq->host_write_index;
+ uint16_t pkts, nb_desc = iq->nb_desc;
+ uint8_t desc_size = iq->desc_size;
+
+ for (pkts = 0; pkts < nb_pkts; pkts++) {
+ m = tx_pkts[pkts];
+ iq->mbuf_list[write_idx] = m;
+ pkt_len = rte_pktmbuf_data_len(m);
+
+ iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size));
+ iqcmd->ih.u64 = iq->partial_ih | pkt_len;
+ iqcmd->dptr = rte_mbuf_data_iova(m); /*dptr*/
+ tx_bytes += pkt_len;
+
+ /* Increment the host write index */
+ write_idx = otx_ep_incr_index(write_idx, 1, nb_desc);
+ }
+ iq->host_write_index = write_idx;
+
+ /* ring dbell */
+ rte_io_wmb();
+ rte_write64(pkts, iq->doorbell_reg);
+ iq->instr_pending += pkts;
+ iq->stats.tx_pkts += pkts;
+ iq->stats.tx_bytes += tx_bytes;
+}
+
+static __rte_always_inline uint16_t
+cnxk_ep_xmit_pkts_scalar_mseg(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq,
+ uint16_t nb_pkts)
+{
+ uint16_t frags, num_sg, mask = OTX_EP_NUM_SG_PTRS - 1;
+ struct otx_ep_buf_free_info *finfo;
+ struct cnxk_ep_instr_32B *iqcmd;
+ struct rte_mbuf *m;
+ uint32_t pkt_len, tx_bytes = 0;
+ uint32_t write_idx = iq->host_write_index;
+ uint16_t pkts, nb_desc = iq->nb_desc;
+ uint8_t desc_size = iq->desc_size;
+
+ for (pkts = 0; pkts < nb_pkts; pkts++) {
+ uint16_t j = 0;
+
+ m = tx_pkts[pkts];
+ frags = m->nb_segs;
+
+ pkt_len = rte_pktmbuf_pkt_len(m);
+ num_sg = (frags + mask) / OTX_EP_NUM_SG_PTRS;
+
+ if (unlikely(pkt_len > OTX_EP_MAX_PKT_SZ && num_sg > OTX_EP_MAX_SG_LISTS)) {
+ otx_ep_err("Failed to xmit the pkt, pkt_len is higher or pkt has more segments\n");
+ goto exit;
+ }
+
+ finfo = &iq->req_list[write_idx].finfo;
+
+ iq->mbuf_list[write_idx] = m;
+ iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size));
+ iqcmd->dptr = rte_mem_virt2iova(finfo->g.sg);
+ iqcmd->ih.u64 = iq->partial_ih | (1ULL << 62) | ((uint64_t)frags << 48) | pkt_len;
+
+ while (frags--) {
+ finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
+ set_sg_size(&finfo->g.sg[(j >> 2)], m->data_len, (j & mask));
+ j++;
+ m = m->next;
+ }
+
+ /* Increment the host write index */
+ write_idx = otx_ep_incr_index(write_idx, 1, nb_desc);
+ tx_bytes += pkt_len;
+ }
+exit:
+ iq->host_write_index = write_idx;
+
+ /* ring dbell */
+ rte_io_wmb();
+ rte_write64(pkts, iq->doorbell_reg);
+ iq->instr_pending += pkts;
+ iq->stats.tx_pkts += pkts;
+ iq->stats.tx_bytes += tx_bytes;
+
+ return pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
+ uint16_t pkts;
+
+ pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending);
+
+ cnxk_ep_xmit_pkts_scalar(tx_pkts, iq, pkts);
+
+ if (iq->instr_pending >= OTX_EP_MAX_INSTR)
+ cnxk_ep_flush_iq(iq);
+
+ /* Return no# of instructions posted successfully. */
+ return pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
+ uint16_t pkts;
+
+ pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending);
+
+ pkts = cnxk_ep_xmit_pkts_scalar_mseg(tx_pkts, iq, pkts);
+
+ if (iq->instr_pending >= OTX_EP_MAX_INSTR)
+ cnxk_ep_flush_iq(iq);
+
+ /* Return no# of instructions posted successfully. */
+ return pkts;
+}
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 7b3669fe0c..ef275703c3 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -156,6 +156,8 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
(void *)iq->inst_cnt_ism, ism_addr);
*iq->inst_cnt_ism = 0;
iq->inst_cnt_ism_prev = 0;
+ iq->partial_ih = ((uint64_t)otx_ep->pkind) << 36;
+
return 0;
}
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.h b/drivers/net/octeon_ep/cnxk_ep_vf.h
index 86277449ea..41d8fbbb3a 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.h
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.h
@@ -6,6 +6,8 @@
#include <rte_io.h>
+#include "otx_ep_common.h"
+
#define CNXK_CONFIG_XPANSION_BAR 0x38
#define CNXK_CONFIG_PCIE_CAP 0x70
#define CNXK_CONFIG_PCIE_DEVCAP 0x74
@@ -178,6 +180,17 @@ struct cnxk_ep_instr_64B {
uint64_t exhdr[4];
};
+struct cnxk_ep_instr_32B {
+ /* Pointer where the input data is available. */
+ uint64_t dptr;
+
+ /* OTX_EP Instruction Header. */
+ union otx_ep_instr_ih ih;
+
+ /* Misc data bytes that can be passed as front data */
+ uint64_t rsvd[2];
+};
+
#define CNXK_EP_IQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue) + 4)
#define CNXK_EP_OQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue))
#define CNXK_EP_ISM_EN (0x1)
diff --git a/drivers/net/octeon_ep/meson.build b/drivers/net/octeon_ep/meson.build
index e698bf9792..749776d70c 100644
--- a/drivers/net/octeon_ep/meson.build
+++ b/drivers/net/octeon_ep/meson.build
@@ -9,4 +9,6 @@ sources = files(
'otx2_ep_vf.c',
'cnxk_ep_vf.c',
'otx_ep_mbox.c',
+ 'cnxk_ep_rx.c',
+ 'cnxk_ep_tx.c',
)
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c b/drivers/net/octeon_ep/otx2_ep_vf.c
index f72b8d25d7..7f4edf8dcf 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -307,6 +307,7 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
(unsigned int)ism_addr);
*iq->inst_cnt_ism = 0;
iq->inst_cnt_ism_prev = 0;
+ iq->partial_ih = ((uint64_t)otx_ep->pkind) << 36;
return 0;
}
diff --git a/drivers/net/octeon_ep/otx_ep_common.h b/drivers/net/octeon_ep/otx_ep_common.h
index 90e059cad0..82e57520d3 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -4,7 +4,20 @@
#ifndef _OTX_EP_COMMON_H_
#define _OTX_EP_COMMON_H_
+#include <rte_bitops.h>
#include <rte_spinlock.h>
+#include <unistd.h>
+#include <assert.h>
+#include <rte_eal.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_io.h>
+#include <rte_net.h>
+#include <ethdev_pci.h>
+
+#define OTX_EP_CN8XX RTE_BIT32(0)
+#define OTX_EP_CN9XX RTE_BIT32(1)
+#define OTX_EP_CN10XX RTE_BIT32(2)
#define OTX_EP_NW_PKT_OP 0x1220
#define OTX_EP_NW_CMD_OP 0x1221
@@ -38,7 +51,7 @@
#define OTX_EP_NORESP_OHSM_SEND (4)
#define OTX_EP_NORESP_LAST (4)
#define OTX_EP_PCI_RING_ALIGN 65536
-#define OTX_EP_MAX_SG_LISTS 4
+#define OTX_EP_MAX_SG_LISTS 6
#define OTX_EP_NUM_SG_PTRS 4
#define SDP_PKIND 40
#define SDP_OTX2_PKIND 57
@@ -203,6 +216,38 @@ struct otx_ep_iq_config {
* such structure to represent it.
*/
struct otx_ep_instr_queue {
+ /* Location in memory updated by SDP ISM */
+ uint32_t *inst_cnt_ism;
+ struct rte_mbuf **mbuf_list;
+ /* Pointer to the Virtual Base addr of the input ring. */
+ uint8_t *base_addr;
+
+ /* track inst count locally to consolidate HW counter updates */
+ uint32_t inst_cnt_ism_prev;
+
+ /* Input ring index, where the driver should write the next packet */
+ uint32_t host_write_index;
+
+ /* Input ring index, where the OCTEON 9 should read the next packet */
+ uint32_t otx_read_index;
+ /** This index aids in finding the window in the queue where OCTEON 9
+ * has read the commands.
+ */
+ uint32_t flush_index;
+ /* This keeps track of the instructions pending in this queue. */
+ uint64_t instr_pending;
+
+ /* Memory zone */
+ const struct rte_memzone *iq_mz;
+ /* OTX_EP doorbell register for the ring. */
+ void *doorbell_reg;
+
+ /* OTX_EP instruction count register for this ring. */
+ void *inst_cnt_reg;
+
+ /* Number of instructions pending to be posted to OCTEON 9. */
+ uint32_t fill_cnt;
+
struct otx_ep_device *otx_ep_dev;
uint32_t q_no;
@@ -219,54 +264,21 @@ struct otx_ep_instr_queue {
/* Size of the descriptor. */
uint8_t desc_size;
- /* Input ring index, where the driver should write the next packet */
- uint32_t host_write_index;
-
- /* Input ring index, where the OCTEON 9 should read the next packet */
- uint32_t otx_read_index;
-
uint32_t reset_instr_cnt;
- /** This index aids in finding the window in the queue where OCTEON 9
- * has read the commands.
- */
- uint32_t flush_index;
-
/* Free-running/wrapping instruction counter for IQ. */
uint32_t inst_cnt;
- /* This keeps track of the instructions pending in this queue. */
- uint64_t instr_pending;
-
- /* Pointer to the Virtual Base addr of the input ring. */
- uint8_t *base_addr;
+ uint64_t partial_ih;
/* This IQ request list */
struct otx_ep_instr_list *req_list;
- /* OTX_EP doorbell register for the ring. */
- void *doorbell_reg;
-
- /* OTX_EP instruction count register for this ring. */
- void *inst_cnt_reg;
-
- /* Number of instructions pending to be posted to OCTEON 9. */
- uint32_t fill_cnt;
-
/* Statistics for this input queue. */
struct otx_ep_iq_stats stats;
/* DMA mapped base address of the input descriptor ring. */
uint64_t base_addr_dma;
-
- /* Memory zone */
- const struct rte_memzone *iq_mz;
-
- /* Location in memory updated by SDP ISM */
- uint32_t *inst_cnt_ism;
-
- /* track inst count locally to consolidate HW counter updates */
- uint32_t inst_cnt_ism_prev;
};
/** Descriptor format.
@@ -344,14 +356,17 @@ struct otx_ep_oq_config {
/* The Descriptor Ring Output Queue(DROQ) structure. */
struct otx_ep_droq {
- struct otx_ep_device *otx_ep_dev;
/* The 8B aligned descriptor ring starts at this address. */
struct otx_ep_droq_desc *desc_ring;
- uint32_t q_no;
- uint64_t last_pkt_count;
+ /* The 8B aligned info ptrs begin from this address. */
+ struct otx_ep_droq_info *info_list;
- struct rte_mempool *mpool;
+ /* receive buffer list contains mbuf ptr list */
+ struct rte_mbuf **recv_buf_list;
+
+ /* Packets pending to be processed */
+ uint64_t pkts_pending;
/* Driver should read the next packet at this index */
uint32_t read_idx;
@@ -362,22 +377,17 @@ struct otx_ep_droq {
/* At this index, the driver will refill the descriptor's buffer */
uint32_t refill_idx;
- /* Packets pending to be processed */
- uint64_t pkts_pending;
+ /* The number of descriptors pending to refill. */
+ uint32_t refill_count;
/* Number of descriptors in this ring. */
uint32_t nb_desc;
- /* The number of descriptors pending to refill. */
- uint32_t refill_count;
-
uint32_t refill_threshold;
- /* The 8B aligned info ptrs begin from this address. */
- struct otx_ep_droq_info *info_list;
+ uint64_t last_pkt_count;
- /* receive buffer list contains mbuf ptr list */
- struct rte_mbuf **recv_buf_list;
+ struct rte_mempool *mpool;
/* The size of each buffer pointed by the buffer pointer. */
uint32_t buffer_size;
@@ -392,6 +402,13 @@ struct otx_ep_droq {
*/
void *pkts_sent_reg;
+ /* Pointer to host memory copy of output packet count, set by ISM */
+ uint32_t *pkts_sent_ism;
+ uint32_t pkts_sent_ism_prev;
+
+ /* Statistics for this DROQ. */
+ struct otx_ep_droq_stats stats;
+
/** Handle DMA incompletion during pkt reads.
* This variable is used to initiate a sent_reg_read
* that completes pending dma
@@ -400,8 +417,9 @@ struct otx_ep_droq {
*/
uint32_t sent_reg_val;
- /* Statistics for this DROQ. */
- struct otx_ep_droq_stats stats;
+ uint32_t q_no;
+
+ struct otx_ep_device *otx_ep_dev;
/* DMA mapped address of the DROQ descriptor ring. */
size_t desc_ring_dma;
@@ -419,10 +437,6 @@ struct otx_ep_droq {
const struct rte_memzone *desc_ring_mz;
const struct rte_memzone *info_mz;
-
- /* Pointer to host memory copy of output packet count, set by ISM */
- uint32_t *pkts_sent_ism;
- uint32_t pkts_sent_ism_prev;
};
#define OTX_EP_DROQ_SIZE (sizeof(struct otx_ep_droq))
@@ -545,6 +559,9 @@ struct otx_ep_device {
/* Negotiated Mbox version */
uint32_t mbox_neg_ver;
+
+ /* Generation */
+ uint32_t chip_gen;
};
int otx_ep_setup_iqs(struct otx_ep_device *otx_ep, uint32_t iq_no,
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c b/drivers/net/octeon_ep/otx_ep_ethdev.c
index 57b965ad06..e965cbaa16 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -27,6 +27,46 @@ static const struct rte_eth_desc_lim otx_ep_tx_desc_lim = {
.nb_align = OTX_EP_TXD_ALIGN,
};
+static void
+otx_ep_set_tx_func(struct rte_eth_dev *eth_dev)
+{
+ struct otx_ep_device *otx_epvf = OTX_EP_DEV(eth_dev);
+
+ if (otx_epvf->chip_gen == OTX_EP_CN10XX || otx_epvf->chip_gen == OTX_EP_CN9XX) {
+ eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts;
+ if (otx_epvf->tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS)
+ eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts_mseg;
+ } else {
+ eth_dev->tx_pkt_burst = &otx_ep_xmit_pkts;
+ }
+
+ if (eth_dev->data->dev_started)
+ rte_eth_fp_ops[eth_dev->data->port_id].tx_pkt_burst =
+ eth_dev->tx_pkt_burst;
+}
+
+static void
+otx_ep_set_rx_func(struct rte_eth_dev *eth_dev)
+{
+ struct otx_ep_device *otx_epvf = OTX_EP_DEV(eth_dev);
+
+ if (otx_epvf->chip_gen == OTX_EP_CN10XX) {
+ eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts;
+ if (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts_mseg;
+ } else if (otx_epvf->chip_gen == OTX_EP_CN9XX) {
+ eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts;
+ if (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts_mseg;
+ } else {
+ eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
+ }
+
+ if (eth_dev->data->dev_started)
+ rte_eth_fp_ops[eth_dev->data->port_id].rx_pkt_burst =
+ eth_dev->rx_pkt_burst;
+}
+
static int
otx_ep_dev_info_get(struct rte_eth_dev *eth_dev,
struct rte_eth_dev_info *devinfo)
@@ -154,6 +194,10 @@ otx_ep_dev_start(struct rte_eth_dev *eth_dev)
}
otx_ep_dev_link_update(eth_dev, 0);
+
+ otx_ep_set_tx_func(eth_dev);
+ otx_ep_set_rx_func(eth_dev);
+
otx_ep_info("dev started\n");
return 0;
@@ -255,18 +299,23 @@ otx_epdev_init(struct otx_ep_device *otx_epvf)
otx_epvf->fn_list.setup_device_regs(otx_epvf);
+ otx_epvf->eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts;
otx_epvf->eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
- if (otx_epvf->chip_id == PCI_DEVID_OCTEONTX_EP_VF)
+ if (otx_epvf->chip_id == PCI_DEVID_OCTEONTX_EP_VF) {
otx_epvf->eth_dev->tx_pkt_burst = &otx_ep_xmit_pkts;
- else if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
+ otx_epvf->chip_gen = OTX_EP_CN8XX;
+ } else if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CN98XX_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CNF95N_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF) {
- otx_epvf->eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
+ otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF) {
+ otx_epvf->eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts;
+ otx_epvf->chip_gen = OTX_EP_CN9XX;
+ } else if (otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF) {
+ otx_epvf->eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts;
+ otx_epvf->chip_gen = OTX_EP_CN10XX;
} else {
otx_ep_err("Invalid chip_id\n");
ret = -EINVAL;
@@ -656,8 +705,8 @@ otx_ep_eth_dev_init(struct rte_eth_dev *eth_dev)
/* Single process support */
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
eth_dev->dev_ops = &otx_ep_eth_dev_ops;
- eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
- eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
+ otx_ep_set_tx_func(eth_dev);
+ otx_ep_set_rx_func(eth_dev);
return 0;
}
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c b/drivers/net/octeon_ep/otx_ep_rxtx.c
index ea7c9a5d62..e7556c5fd2 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -13,15 +13,8 @@
#include "otx_ep_common.h"
#include "otx_ep_vf.h"
-#include "otx2_ep_vf.h"
#include "otx_ep_rxtx.h"
-/* SDP_LENGTH_S specifies packet length and is of 8-byte size */
-#define OTX_EP_INFO_SIZE 8
-#define OTX_EP_FSZ_FS0 0
-#define DROQ_REFILL_THRESHOLD 16
-#define OTX2_SDP_REQUEST_ISM (0x1ULL << 63)
-
static void
otx_ep_dmazone_free(const struct rte_memzone *mz)
{
@@ -144,6 +137,13 @@ otx_ep_init_instr_queue(struct otx_ep_device *otx_ep, int iq_no, int num_descs,
iq_no, iq->base_addr, (unsigned long)iq->base_addr_dma,
iq->nb_desc);
+ iq->mbuf_list = rte_zmalloc_socket("mbuf_list", (iq->nb_desc * sizeof(struct rte_mbuf *)),
+ RTE_CACHE_LINE_SIZE, rte_socket_id());
+ if (!iq->mbuf_list) {
+ otx_ep_err("IQ[%d] mbuf_list alloc failed\n", iq_no);
+ goto iq_init_fail;
+ }
+
iq->otx_ep_dev = otx_ep;
iq->q_no = iq_no;
iq->fill_cnt = 0;
@@ -673,85 +673,6 @@ otx_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
return count;
}
-/* Enqueue requests/packets to OTX_EP IQ queue.
- * returns number of requests enqueued successfully
- */
-uint16_t
-otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
-{
- struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
- struct otx_ep_device *otx_ep = iq->otx_ep_dev;
- struct otx2_ep_instr_64B iqcmd2;
- uint32_t iqreq_type;
- struct rte_mbuf *m;
- uint32_t pkt_len;
- int count = 0;
- uint16_t i;
- int dbell;
- int index;
-
- iqcmd2.ih.u64 = 0;
- iqcmd2.irh.u64 = 0;
-
- /* ih invars */
- iqcmd2.ih.s.fsz = OTX_EP_FSZ_FS0;
- iqcmd2.ih.s.pkind = otx_ep->pkind; /* The SDK decided PKIND value */
- /* irh invars */
- iqcmd2.irh.s.opcode = OTX_EP_NW_PKT_OP;
-
- for (i = 0; i < nb_pkts; i++) {
- m = pkts[i];
- if (m->nb_segs == 1) {
- pkt_len = rte_pktmbuf_data_len(m);
- iqcmd2.ih.s.tlen = pkt_len + iqcmd2.ih.s.fsz;
- iqcmd2.dptr = rte_mbuf_data_iova(m); /*dptr*/
- iqcmd2.ih.s.gather = 0;
- iqcmd2.ih.s.gsz = 0;
- iqreq_type = OTX_EP_REQTYPE_NORESP_NET;
- } else {
- if (!(otx_ep->tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS))
- goto xmit_fail;
-
- if (unlikely(prepare_xmit_gather_list(iq, m, &iqcmd2.dptr, &iqcmd2.ih) < 0))
- goto xmit_fail;
-
- pkt_len = rte_pktmbuf_pkt_len(m);
- iqreq_type = OTX_EP_REQTYPE_NORESP_GATHER;
- }
-
- iqcmd2.irh.u64 = rte_bswap64(iqcmd2.irh.u64);
-
-#ifdef OTX_EP_IO_DEBUG
- otx_ep_dbg("After swapping\n");
- otx_ep_dbg("Word0 [dptr]: 0x%016lx\n",
- (unsigned long)iqcmd.dptr);
- otx_ep_dbg("Word1 [ihtx]: 0x%016lx\n", (unsigned long)iqcmd.ih);
- otx_ep_dbg("Word2 [pki_ih3]: 0x%016lx\n",
- (unsigned long)iqcmd.pki_ih3);
- otx_ep_dbg("Word3 [rptr]: 0x%016lx\n",
- (unsigned long)iqcmd.rptr);
- otx_ep_dbg("Word4 [irh]: 0x%016lx\n", (unsigned long)iqcmd.irh);
- otx_ep_dbg("Word5 [exhdr[0]]: 0x%016lx\n",
- (unsigned long)iqcmd.exhdr[0]);
-#endif
- index = iq->host_write_index;
- dbell = (i == (unsigned int)(nb_pkts - 1)) ? 1 : 0;
- if (otx_ep_send_data(otx_ep, iq, &iqcmd2, dbell))
- goto xmit_fail;
- otx_ep_iqreq_add(iq, m, iqreq_type, index);
- iq->stats.tx_pkts++;
- iq->stats.tx_bytes += pkt_len;
- count++;
- }
-
-xmit_fail:
- if (iq->instr_pending >= OTX_EP_MAX_INSTR)
- otx_ep_flush_iq(iq);
-
- /* Return no# of instructions posted successfully. */
- return count;
-}
-
static uint32_t
otx_ep_droq_refill(struct otx_ep_droq *droq)
{
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.h b/drivers/net/octeon_ep/otx_ep_rxtx.h
index 3f12527004..cb68ef3b41 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.h
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.h
@@ -7,29 +7,53 @@
#include <rte_byteorder.h>
-#define OTX_EP_RXD_ALIGN 2
-#define OTX_EP_TXD_ALIGN 2
+#define OTX_EP_RXD_ALIGN 8
+#define OTX_EP_TXD_ALIGN 8
#define OTX_EP_IQ_SEND_FAILED (-1)
#define OTX_EP_IQ_SEND_SUCCESS (0)
-#define OTX_EP_MAX_DELAYED_PKT_RETRIES 10000
+#define OTX_EP_MAX_DELAYED_PKT_RETRIES 10
#define OTX_EP_FSZ 28
#define OTX2_EP_FSZ 24
-#define OTX_EP_MAX_INSTR 16
+#define OTX_EP_MAX_INSTR 128
+
+/* SDP_LENGTH_S specifies packet length and is of 8-byte size */
+#define OTX_EP_INFO_SIZE 8
+#define DROQ_REFILL_THRESHOLD 16
+#define OTX2_SDP_REQUEST_ISM (0x1ULL << 63)
static inline uint32_t
otx_ep_incr_index(uint32_t index, uint32_t count, uint32_t max)
{
return ((index + count) & (max - 1));
}
+
uint16_t
otx_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
uint16_t
otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+otx_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cnxk_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+cnxk_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cnxk_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cn9k_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
uint16_t
-otx_ep_recv_pkts(void *rx_queue,
- struct rte_mbuf **rx_pkts,
- uint16_t budget);
+cn9k_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
#endif /* _OTX_EP_RXTX_H_ */
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v2 0/3] rewrite fastpath routines
2023-10-11 1:50 [PATCH 0/3] rewrite fastpath routines Vamsi Attunuru
` (2 preceding siblings ...)
2023-10-11 1:50 ` [PATCH 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
@ 2023-10-11 8:36 ` Vamsi Attunuru
2023-10-11 8:36 ` [PATCH v2 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
` (3 more replies)
3 siblings, 4 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-11 8:36 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
This series adds new fastpath routines for cn10k & cn9k endpoint
devices and supports 32B Tx desciptor format which improves the
performance.
v2 change:
- Fixed CI
Shijith Thotton (1):
net/octeon_ep: support 32B IQ descriptor size
Vamsi Attunuru (2):
net/octeon_ep: clean up receive routine
net/octeon_ep: add new fastpath routines
drivers/net/octeon_ep/cnxk_ep_rx.c | 309 ++++++++++++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_tx.c | 209 +++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_vf.c | 12 +-
drivers/net/octeon_ep/cnxk_ep_vf.h | 13 ++
drivers/net/octeon_ep/meson.build | 12 +
drivers/net/octeon_ep/otx2_ep_vf.c | 11 +-
drivers/net/octeon_ep/otx_ep_common.h | 127 ++++++-----
drivers/net/octeon_ep/otx_ep_ethdev.c | 69 +++++-
drivers/net/octeon_ep/otx_ep_rxtx.c | 263 +++++++---------------
drivers/net/octeon_ep/otx_ep_rxtx.h | 38 +++-
drivers/net/octeon_ep/otx_ep_vf.c | 8 +
11 files changed, 814 insertions(+), 257 deletions(-)
create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx.c
create mode 100644 drivers/net/octeon_ep/cnxk_ep_tx.c
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v2 1/3] net/octeon_ep: support 32B IQ descriptor size
2023-10-11 8:36 ` [PATCH v2 0/3] rewrite " Vamsi Attunuru
@ 2023-10-11 8:36 ` Vamsi Attunuru
2023-10-11 8:36 ` [PATCH v2 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
` (2 subsequent siblings)
3 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-11 8:36 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton
From: Shijith Thotton <sthotton@marvell.com>
Update input queue setup to consider descriptor size in driver conf.
The default instruction size for otx2 and cnxk devices has been updated
to 32 bytes.
Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
drivers/net/octeon_ep/cnxk_ep_vf.c | 10 +++++++++-
drivers/net/octeon_ep/meson.build | 10 ++++++++++
drivers/net/octeon_ep/otx2_ep_vf.c | 10 +++++++++-
drivers/net/octeon_ep/otx_ep_common.h | 4 ++++
drivers/net/octeon_ep/otx_ep_rxtx.c | 8 +++-----
drivers/net/octeon_ep/otx_ep_vf.c | 8 ++++++++
6 files changed, 43 insertions(+), 7 deletions(-)
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 92c2d2ca5c..7b3669fe0c 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -106,6 +106,14 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(CNXK_EP_R_IN_CTL_IS_64B);
+ else
+ reg_val |= CNXK_EP_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + CNXK_EP_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + CNXK_EP_R_IN_INSTR_RSIZE(iq_no));
@@ -354,7 +362,7 @@ static const struct otx_ep_config default_cnxk_ep_conf = {
/* IQ attributes */
.iq = {
.max_iqs = OTX_EP_CFG_IO_QUEUES,
- .instr_type = OTX_EP_64BYTE_INSTR,
+ .instr_type = OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/meson.build b/drivers/net/octeon_ep/meson.build
index e698bf9792..4538c0396e 100644
--- a/drivers/net/octeon_ep/meson.build
+++ b/drivers/net/octeon_ep/meson.build
@@ -10,3 +10,13 @@ sources = files(
'cnxk_ep_vf.c',
'otx_ep_mbox.c',
)
+
+if (toolchain == 'gcc' and cc.version().version_compare('>=11.0.0'))
+ error_cflags += ['-Wno-array-bounds']
+endif
+
+foreach flag: error_cflags
+ if cc.has_argument(flag)
+ c_args += flag
+ endif
+endforeach
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c b/drivers/net/octeon_ep/otx2_ep_vf.c
index ced3a415a5..f72b8d25d7 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -256,6 +256,14 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(SDP_VF_R_IN_CTL_IS_64B);
+ else
+ reg_val |= SDP_VF_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + SDP_VF_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_RSIZE(iq_no));
@@ -500,7 +508,7 @@ static const struct otx_ep_config default_otx2_ep_conf = {
/* IQ attributes */
.iq = {
.max_iqs = OTX_EP_CFG_IO_QUEUES,
- .instr_type = OTX_EP_64BYTE_INSTR,
+ .instr_type = OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx_ep_common.h b/drivers/net/octeon_ep/otx_ep_common.h
index c150cbe619..90e059cad0 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -11,6 +11,7 @@
#define OTX_EP_MAX_RINGS_PER_VF (8)
#define OTX_EP_CFG_IO_QUEUES OTX_EP_MAX_RINGS_PER_VF
+#define OTX_EP_32BYTE_INSTR (32)
#define OTX_EP_64BYTE_INSTR (64)
/*
* Backpressure for SDP is configured on Octeon, and the minimum queue sizes
@@ -215,6 +216,9 @@ struct otx_ep_instr_queue {
/* Number of descriptors in this ring. */
uint32_t nb_desc;
+ /* Size of the descriptor. */
+ uint8_t desc_size;
+
/* Input ring index, where the driver should write the next packet */
uint32_t host_write_index;
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c b/drivers/net/octeon_ep/otx_ep_rxtx.c
index b37fc8109f..5b759d759b 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -484,7 +484,7 @@ otx_ep_ring_doorbell(struct otx_ep_device *otx_ep __rte_unused,
static inline int
post_iqcmd(struct otx_ep_instr_queue *iq, uint8_t *iqcmd)
{
- uint8_t *iqptr, cmdsize;
+ uint8_t *iqptr;
/* This ensures that the read index does not wrap around to
* the same position if queue gets full before OCTEON 9 could
@@ -494,10 +494,8 @@ post_iqcmd(struct otx_ep_instr_queue *iq, uint8_t *iqcmd)
return OTX_EP_IQ_SEND_FAILED;
/* Copy cmd into iq */
- cmdsize = 64;
- iqptr = iq->base_addr + (iq->host_write_index << 6);
-
- rte_memcpy(iqptr, iqcmd, cmdsize);
+ iqptr = iq->base_addr + (iq->host_write_index * iq->desc_size);
+ rte_memcpy(iqptr, iqcmd, iq->desc_size);
/* Increment the host write index */
iq->host_write_index =
diff --git a/drivers/net/octeon_ep/otx_ep_vf.c b/drivers/net/octeon_ep/otx_ep_vf.c
index 4f3538146b..236b7a874c 100644
--- a/drivers/net/octeon_ep/otx_ep_vf.c
+++ b/drivers/net/octeon_ep/otx_ep_vf.c
@@ -120,6 +120,14 @@ otx_ep_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (iq->desc_size == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(OTX_EP_R_IN_CTL_IS_64B);
+ else
+ reg_val |= OTX_EP_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + OTX_EP_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
otx_ep_write64(iq->base_addr_dma, otx_ep->hw_addr,
OTX_EP_R_IN_INSTR_BADDR(iq_no));
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v2 2/3] net/octeon_ep: clean up receive routine
2023-10-11 8:36 ` [PATCH v2 0/3] rewrite " Vamsi Attunuru
2023-10-11 8:36 ` [PATCH v2 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
@ 2023-10-11 8:36 ` Vamsi Attunuru
2023-10-11 8:36 ` [PATCH v2 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
2023-10-11 12:53 ` [PATCH v3 0/3] rewrite " Vamsi Attunuru
3 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-11 8:36 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
Patch improves Rx routine and pkt count update routines,
packet count update routines need to drain inflight ISM
memory updates while decrementing the packet count register.
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
---
drivers/net/octeon_ep/otx_ep_rxtx.c | 162 ++++++++++++----------------
1 file changed, 68 insertions(+), 94 deletions(-)
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c b/drivers/net/octeon_ep/otx_ep_rxtx.c
index 5b759d759b..ea7c9a5d62 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -442,7 +442,14 @@ otx_vf_update_read_index(struct otx_ep_instr_queue *iq)
* when count above halfway to saturation.
*/
rte_write32(val, iq->inst_cnt_reg);
- *iq->inst_cnt_ism = 0;
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ while (__atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ rte_mb();
+ }
+
iq->inst_cnt_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
@@ -565,9 +572,7 @@ prepare_xmit_gather_list(struct otx_ep_instr_queue *iq, struct rte_mbuf *m, uint
finfo = &iq->req_list[iq->host_write_index].finfo;
*dptr = rte_mem_virt2iova(finfo->g.sg);
- ih->s.tlen = pkt_len + ih->s.fsz;
- ih->s.gsz = frags;
- ih->s.gather = 1;
+ ih->u64 |= ((1ULL << 62) | ((uint64_t)frags << 48) | (pkt_len + ih->s.fsz));
while (frags--) {
finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
@@ -750,36 +755,26 @@ otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
static uint32_t
otx_ep_droq_refill(struct otx_ep_droq *droq)
{
- struct otx_ep_droq_desc *desc_ring;
+ struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
struct otx_ep_droq_info *info;
struct rte_mbuf *buf = NULL;
uint32_t desc_refilled = 0;
- desc_ring = droq->desc_ring;
-
while (droq->refill_count && (desc_refilled < droq->nb_desc)) {
- /* If a valid buffer exists (happens if there is no dispatch),
- * reuse the buffer, else allocate.
- */
- if (droq->recv_buf_list[droq->refill_idx] != NULL)
- break;
-
buf = rte_pktmbuf_alloc(droq->mpool);
/* If a buffer could not be allocated, no point in
* continuing
*/
- if (buf == NULL) {
+ if (unlikely(!buf)) {
droq->stats.rx_alloc_failure++;
break;
}
info = rte_pktmbuf_mtod(buf, struct otx_ep_droq_info *);
- memset(info, 0, sizeof(*info));
+ info->length = 0;
droq->recv_buf_list[droq->refill_idx] = buf;
desc_ring[droq->refill_idx].buffer_ptr =
rte_mbuf_data_iova_default(buf);
-
-
droq->refill_idx = otx_ep_incr_index(droq->refill_idx, 1,
droq->nb_desc);
@@ -791,21 +786,18 @@ otx_ep_droq_refill(struct otx_ep_droq *droq)
}
static struct rte_mbuf *
-otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
- struct otx_ep_droq *droq, int next_fetch)
+otx_ep_droq_read_packet(struct otx_ep_device *otx_ep, struct otx_ep_droq *droq, int next_fetch)
{
volatile struct otx_ep_droq_info *info;
- struct rte_mbuf *droq_pkt2 = NULL;
- struct rte_mbuf *droq_pkt = NULL;
- struct rte_net_hdr_lens hdr_lens;
- struct otx_ep_droq_info *info2;
+ struct rte_mbuf *mbuf_next = NULL;
+ struct rte_mbuf *mbuf = NULL;
uint64_t total_pkt_len;
uint32_t pkt_len = 0;
int next_idx;
- droq_pkt = droq->recv_buf_list[droq->read_idx];
- droq_pkt2 = droq->recv_buf_list[droq->read_idx];
- info = rte_pktmbuf_mtod(droq_pkt, struct otx_ep_droq_info *);
+ mbuf = droq->recv_buf_list[droq->read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
/* make sure info is available */
rte_rmb();
if (unlikely(!info->length)) {
@@ -826,32 +818,25 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
assert(0);
}
}
+
if (next_fetch) {
next_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
- droq_pkt2 = droq->recv_buf_list[next_idx];
- info2 = rte_pktmbuf_mtod(droq_pkt2, struct otx_ep_droq_info *);
- rte_prefetch_non_temporal((const void *)info2);
+ mbuf_next = droq->recv_buf_list[next_idx];
+ rte_prefetch0(rte_pktmbuf_mtod(mbuf_next, void *));
}
- info->length = rte_bswap64(info->length);
+ info->length = rte_bswap16(info->length >> 48);
/* Deduce the actual data size */
total_pkt_len = info->length + OTX_EP_INFO_SIZE;
if (total_pkt_len <= droq->buffer_size) {
- droq_pkt = droq->recv_buf_list[droq->read_idx];
- if (likely(droq_pkt != NULL)) {
- droq_pkt->data_off += OTX_EP_INFO_SIZE;
- /* otx_ep_dbg("OQ: pkt_len[%ld], buffer_size %d\n",
- * (long)info->length, droq->buffer_size);
- */
- pkt_len = (uint32_t)info->length;
- droq_pkt->pkt_len = pkt_len;
- droq_pkt->data_len = pkt_len;
- droq_pkt->port = otx_ep->port_id;
- droq->recv_buf_list[droq->read_idx] = NULL;
- droq->read_idx = otx_ep_incr_index(droq->read_idx, 1,
- droq->nb_desc);
- droq->refill_count++;
- }
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ pkt_len = (uint32_t)info->length;
+ mbuf->pkt_len = pkt_len;
+ mbuf->data_len = pkt_len;
+ mbuf->port = otx_ep->port_id;
+ droq->recv_buf_list[droq->read_idx] = NULL;
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
+ droq->refill_count++;
} else {
struct rte_mbuf *first_buf = NULL;
struct rte_mbuf *last_buf = NULL;
@@ -863,61 +848,50 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
while (pkt_len < total_pkt_len) {
int cpy_len = 0;
- cpy_len = ((pkt_len + droq->buffer_size) >
- total_pkt_len)
- ? ((uint32_t)total_pkt_len -
- pkt_len)
+ cpy_len = ((pkt_len + droq->buffer_size) > total_pkt_len)
+ ? ((uint32_t)total_pkt_len - pkt_len)
: droq->buffer_size;
- droq_pkt = droq->recv_buf_list[droq->read_idx];
+ mbuf = droq->recv_buf_list[droq->read_idx];
droq->recv_buf_list[droq->read_idx] = NULL;
- if (likely(droq_pkt != NULL)) {
+ if (likely(mbuf)) {
/* Note the first seg */
if (!pkt_len)
- first_buf = droq_pkt;
+ first_buf = mbuf;
- droq_pkt->port = otx_ep->port_id;
+ mbuf->port = otx_ep->port_id;
if (!pkt_len) {
- droq_pkt->data_off +=
- OTX_EP_INFO_SIZE;
- droq_pkt->pkt_len =
- cpy_len - OTX_EP_INFO_SIZE;
- droq_pkt->data_len =
- cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_len = cpy_len - OTX_EP_INFO_SIZE;
} else {
- droq_pkt->pkt_len = cpy_len;
- droq_pkt->data_len = cpy_len;
+ mbuf->pkt_len = cpy_len;
+ mbuf->data_len = cpy_len;
}
if (pkt_len) {
first_buf->nb_segs++;
- first_buf->pkt_len += droq_pkt->pkt_len;
+ first_buf->pkt_len += mbuf->pkt_len;
}
if (last_buf)
- last_buf->next = droq_pkt;
+ last_buf->next = mbuf;
- last_buf = droq_pkt;
+ last_buf = mbuf;
} else {
otx_ep_err("no buf\n");
assert(0);
}
pkt_len += cpy_len;
- droq->read_idx = otx_ep_incr_index(droq->read_idx, 1,
- droq->nb_desc);
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
droq->refill_count++;
}
- droq_pkt = first_buf;
+ mbuf = first_buf;
}
- droq_pkt->packet_type = rte_net_get_ptype(droq_pkt, &hdr_lens,
- RTE_PTYPE_ALL_MASK);
- droq_pkt->l2_len = hdr_lens.l2_len;
- droq_pkt->l3_len = hdr_lens.l3_len;
- droq_pkt->l4_len = hdr_lens.l4_len;
- return droq_pkt;
+ return mbuf;
}
static inline uint32_t
@@ -941,7 +915,14 @@ otx_ep_check_droq_pkts(struct otx_ep_droq *droq)
* when count above halfway to saturation.
*/
rte_write32(val, droq->pkts_sent_reg);
- *droq->pkts_sent_ism = 0;
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ while (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ rte_mb();
+ }
+
droq->pkts_sent_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
@@ -950,36 +931,30 @@ otx_ep_check_droq_pkts(struct otx_ep_droq *droq)
return new_pkts;
}
+static inline int32_t __rte_hot
+otx_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
+{
+ if (unlikely(droq->pkts_pending < nb_pkts))
+ otx_ep_check_droq_pkts(droq);
+
+ return RTE_MIN(nb_pkts, droq->pkts_pending);
+}
+
/* Check for response arrival from OCTEON 9
* returns number of requests completed
*/
uint16_t
-otx_ep_recv_pkts(void *rx_queue,
- struct rte_mbuf **rx_pkts,
- uint16_t budget)
+otx_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
{
struct otx_ep_droq *droq = rx_queue;
struct otx_ep_device *otx_ep;
struct rte_mbuf *oq_pkt;
-
- uint32_t pkts = 0;
+ uint16_t pkts, new_pkts;
uint32_t valid_pkts = 0;
- uint32_t new_pkts = 0;
int next_fetch;
otx_ep = droq->otx_ep_dev;
-
- if (droq->pkts_pending > budget) {
- new_pkts = budget;
- } else {
- new_pkts = droq->pkts_pending;
- new_pkts += otx_ep_check_droq_pkts(droq);
- if (new_pkts > budget)
- new_pkts = budget;
- }
-
- if (!new_pkts)
- goto update_credit; /* No pkts at this moment */
+ new_pkts = otx_ep_rx_pkts_to_process(droq, nb_pkts);
for (pkts = 0; pkts < new_pkts; pkts++) {
/* Push the received pkt to application */
@@ -1004,7 +979,6 @@ otx_ep_recv_pkts(void *rx_queue,
droq->pkts_pending -= pkts;
/* Refill DROQ buffers */
-update_credit:
if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
int desc_refilled = otx_ep_droq_refill(droq);
@@ -1012,7 +986,7 @@ otx_ep_recv_pkts(void *rx_queue,
* that when we update the credits the data in memory is
* accurate.
*/
- rte_wmb();
+ rte_io_wmb();
rte_write32(desc_refilled, droq->pkts_credit_reg);
} else {
/*
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v2 3/3] net/octeon_ep: add new fastpath routines
2023-10-11 8:36 ` [PATCH v2 0/3] rewrite " Vamsi Attunuru
2023-10-11 8:36 ` [PATCH v2 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
2023-10-11 8:36 ` [PATCH v2 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
@ 2023-10-11 8:36 ` Vamsi Attunuru
2023-10-11 12:53 ` [PATCH v3 0/3] rewrite " Vamsi Attunuru
3 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-11 8:36 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
Adds new fastpath routines for cn10k & cn9k endpoint
devices and assigns the fastpath routines based on
the offload flags.
Patch also adds misc changes to improve performance
and code-readability.
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
---
drivers/net/octeon_ep/cnxk_ep_rx.c | 309 ++++++++++++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_tx.c | 209 +++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_vf.c | 2 +
drivers/net/octeon_ep/cnxk_ep_vf.h | 13 ++
drivers/net/octeon_ep/meson.build | 2 +
drivers/net/octeon_ep/otx2_ep_vf.c | 1 +
drivers/net/octeon_ep/otx_ep_common.h | 125 ++++++-----
drivers/net/octeon_ep/otx_ep_ethdev.c | 69 +++++-
drivers/net/octeon_ep/otx_ep_rxtx.c | 93 +-------
drivers/net/octeon_ep/otx_ep_rxtx.h | 38 +++-
10 files changed, 704 insertions(+), 157 deletions(-)
diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.c b/drivers/net/octeon_ep/cnxk_ep_rx.c
new file mode 100644
index 0000000000..74f0011283
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.c
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "otx_ep_common.h"
+#include "otx2_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static inline int
+cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)
+{
+ struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t refill_idx = droq->refill_idx;
+ struct rte_mbuf *buf;
+ uint32_t i;
+ int rc;
+
+ rc = rte_pktmbuf_alloc_bulk(droq->mpool, &recv_buf_list[refill_idx], count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return rc;
+ }
+
+ for (i = 0; i < count; i++) {
+ buf = recv_buf_list[refill_idx];
+ desc_ring[refill_idx].buffer_ptr = rte_mbuf_data_iova_default(buf);
+ refill_idx++;
+ }
+
+ droq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, droq->nb_desc);
+ droq->refill_count -= count;
+
+ return 0;
+}
+
+static inline void
+cnxk_ep_rx_refill(struct otx_ep_droq *droq)
+{
+ uint32_t desc_refilled = 0, count;
+ uint32_t nb_desc = droq->nb_desc;
+ uint32_t refill_idx = droq->refill_idx;
+ int rc;
+
+ if (unlikely(droq->read_idx == refill_idx))
+ return;
+
+ if (refill_idx < droq->read_idx) {
+ count = droq->read_idx - refill_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+ desc_refilled = count;
+ } else {
+ count = nb_desc - refill_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+
+ desc_refilled = count;
+ count = droq->read_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+ desc_refilled += count;
+ }
+
+ /* Flush the droq descriptor data to memory to be sure
+ * that when we update the credits the data in memory is
+ * accurate.
+ */
+ rte_io_wmb();
+ rte_write32(desc_refilled, droq->pkts_credit_reg);
+}
+
+static inline uint32_t
+cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
+{
+ uint32_t new_pkts;
+ uint32_t val;
+
+ /* Batch subtractions from the HW counter to reduce PCIe traffic
+ * This adds an extra local variable, but almost halves the
+ * number of PCIe writes.
+ */
+ val = __atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED);
+ new_pkts = val - droq->pkts_sent_ism_prev;
+ droq->pkts_sent_ism_prev = val;
+
+ if (val > (uint32_t)(1 << 31)) {
+ /* Only subtract the packet count in the HW counter
+ * when count above halfway to saturation.
+ */
+ rte_write64((uint64_t)val, droq->pkts_sent_reg);
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ while (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ rte_mb();
+ }
+
+ droq->pkts_sent_ism_prev = 0;
+ }
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ droq->pkts_pending += new_pkts;
+
+ return new_pkts;
+}
+
+static inline int16_t __rte_hot
+cnxk_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
+{
+ if (droq->pkts_pending < nb_pkts)
+ cnxk_ep_check_rx_pkts(droq);
+
+ return RTE_MIN(nb_pkts, droq->pkts_pending);
+}
+
+static __rte_always_inline void
+cnxk_ep_process_pkts_scalar(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, uint16_t new_pkts)
+{
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t bytes_rsvd = 0, read_idx = droq->read_idx;
+ uint16_t port_id = droq->otx_ep_dev->port_id;
+ uint16_t nb_desc = droq->nb_desc;
+ uint16_t pkts;
+
+ for (pkts = 0; pkts < new_pkts; pkts++) {
+ struct otx_ep_droq_info *info;
+ struct rte_mbuf *mbuf;
+ uint16_t pkt_len;
+
+ mbuf = recv_buf_list[read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+ read_idx = otx_ep_incr_index(read_idx, 1, nb_desc);
+ pkt_len = rte_bswap16(info->length >> 48);
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = pkt_len;
+ mbuf->data_len = pkt_len;
+ mbuf->port = port_id;
+ rx_pkts[pkts] = mbuf;
+ bytes_rsvd += pkt_len;
+ }
+ droq->read_idx = read_idx;
+
+ droq->refill_count += new_pkts;
+ droq->pkts_pending -= new_pkts;
+ /* Stats */
+ droq->stats.pkts_received += new_pkts;
+ droq->stats.bytes_received += bytes_rsvd;
+}
+
+static __rte_always_inline void
+cnxk_ep_process_pkts_scalar_mseg(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq,
+ uint16_t new_pkts)
+{
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t total_pkt_len, bytes_rsvd = 0;
+ uint16_t port_id = droq->otx_ep_dev->port_id;
+ uint16_t nb_desc = droq->nb_desc;
+ uint16_t pkts;
+
+ for (pkts = 0; pkts < new_pkts; pkts++) {
+ struct otx_ep_droq_info *info;
+ struct rte_mbuf *first_buf = NULL;
+ struct rte_mbuf *last_buf = NULL;
+ struct rte_mbuf *mbuf;
+ uint32_t pkt_len = 0;
+
+ mbuf = recv_buf_list[droq->read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
+ total_pkt_len = rte_bswap16(info->length >> 48) + OTX_EP_INFO_SIZE;
+
+ while (pkt_len < total_pkt_len) {
+ int cpy_len;
+
+ cpy_len = ((pkt_len + droq->buffer_size) > total_pkt_len)
+ ? ((uint32_t)total_pkt_len - pkt_len) : droq->buffer_size;
+
+ mbuf = droq->recv_buf_list[droq->read_idx];
+
+ if (!pkt_len) {
+ /* Note the first seg */
+ first_buf = mbuf;
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_len = cpy_len - OTX_EP_INFO_SIZE;
+ } else {
+ mbuf->pkt_len = cpy_len;
+ mbuf->data_len = cpy_len;
+ first_buf->nb_segs++;
+ first_buf->pkt_len += mbuf->pkt_len;
+ }
+
+ if (last_buf)
+ last_buf->next = mbuf;
+
+ last_buf = mbuf;
+
+ pkt_len += cpy_len;
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, nb_desc);
+ droq->refill_count++;
+ }
+ mbuf = first_buf;
+ mbuf->port = port_id;
+ rx_pkts[pkts] = mbuf;
+ bytes_rsvd += pkt_len;
+ }
+
+ droq->refill_count += new_pkts;
+ droq->pkts_pending -= pkts;
+ /* Stats */
+ droq->stats.pkts_received += pkts;
+ droq->stats.bytes_received += bytes_rsvd;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD)
+ cnxk_ep_rx_refill(droq);
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cn9k_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
+ cnxk_ep_rx_refill(droq);
+ } else {
+ /* SDP output goes into DROP state when output doorbell count
+ * goes below drop count. When door bell count is written with
+ * a value greater than drop count SDP output should come out
+ * of DROP state. Due to a race condition this is not happening.
+ * Writing doorbell register with 0 again may make SDP output
+ * come out of this state.
+ */
+
+ rte_write32(0, droq->pkts_credit_reg);
+ }
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar_mseg(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD)
+ cnxk_ep_rx_refill(droq);
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cn9k_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar_mseg(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
+ cnxk_ep_rx_refill(droq);
+ } else {
+ /* SDP output goes into DROP state when output doorbell count
+ * goes below drop count. When door bell count is written with
+ * a value greater than drop count SDP output should come out
+ * of DROP state. Due to a race condition this is not happening.
+ * Writing doorbell register with 0 again may make SDP output
+ * come out of this state.
+ */
+
+ rte_write32(0, droq->pkts_credit_reg);
+ }
+
+ return new_pkts;
+}
diff --git a/drivers/net/octeon_ep/cnxk_ep_tx.c b/drivers/net/octeon_ep/cnxk_ep_tx.c
new file mode 100644
index 0000000000..9f11a2f317
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_tx.c
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "cnxk_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static uint32_t
+cnxk_vf_update_read_index(struct otx_ep_instr_queue *iq)
+{
+ uint32_t val;
+
+ /* Batch subtractions from the HW counter to reduce PCIe traffic
+ * This adds an extra local variable, but almost halves the
+ * number of PCIe writes.
+ */
+ val = __atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED);
+ iq->inst_cnt += val - iq->inst_cnt_ism_prev;
+ iq->inst_cnt_ism_prev = val;
+
+ if (val > (uint32_t)(1 << 31)) {
+ /* Only subtract the packet count in the HW counter
+ * when count above halfway to saturation.
+ */
+ rte_write64((uint64_t)val, iq->inst_cnt_reg);
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ while (__atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ rte_mb();
+ }
+
+ iq->inst_cnt_ism_prev = 0;
+ }
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+
+ /* Modulo of the new index with the IQ size will give us
+ * the new index.
+ */
+ return iq->inst_cnt & (iq->nb_desc - 1);
+}
+
+static inline void
+cnxk_ep_flush_iq(struct otx_ep_instr_queue *iq)
+{
+ uint32_t instr_processed = 0;
+ uint32_t cnt = 0;
+
+ iq->otx_read_index = cnxk_vf_update_read_index(iq);
+
+ if (unlikely(iq->flush_index == iq->otx_read_index))
+ return;
+
+ if (iq->flush_index < iq->otx_read_index) {
+ instr_processed = iq->otx_read_index - iq->flush_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc);
+ } else {
+ cnt = iq->nb_desc - iq->flush_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], cnt);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, cnt, iq->nb_desc);
+
+ instr_processed = iq->otx_read_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc);
+
+ instr_processed += cnt;
+ }
+
+ iq->stats.instr_processed = instr_processed;
+ iq->instr_pending -= instr_processed;
+}
+
+static inline void
+set_sg_size(struct otx_ep_sg_entry *sg_entry, uint16_t size, uint32_t pos)
+{
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ sg_entry->u.size[pos] = size;
+#elif RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+ sg_entry->u.size[(OTX_EP_NUM_SG_PTRS - 1) - pos] = size;
+#endif
+}
+
+static __rte_always_inline void
+cnxk_ep_xmit_pkts_scalar(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq, uint16_t nb_pkts)
+{
+ struct cnxk_ep_instr_32B *iqcmd;
+ struct rte_mbuf *m;
+ uint32_t pkt_len;
+ uint32_t tx_bytes = 0;
+ uint32_t write_idx = iq->host_write_index;
+ uint16_t pkts, nb_desc = iq->nb_desc;
+ uint8_t desc_size = iq->desc_size;
+
+ for (pkts = 0; pkts < nb_pkts; pkts++) {
+ m = tx_pkts[pkts];
+ iq->mbuf_list[write_idx] = m;
+ pkt_len = rte_pktmbuf_data_len(m);
+
+ iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size));
+ iqcmd->ih.u64 = iq->partial_ih | pkt_len;
+ iqcmd->dptr = rte_mbuf_data_iova(m); /*dptr*/
+ tx_bytes += pkt_len;
+
+ /* Increment the host write index */
+ write_idx = otx_ep_incr_index(write_idx, 1, nb_desc);
+ }
+ iq->host_write_index = write_idx;
+
+ /* ring dbell */
+ rte_io_wmb();
+ rte_write64(pkts, iq->doorbell_reg);
+ iq->instr_pending += pkts;
+ iq->stats.tx_pkts += pkts;
+ iq->stats.tx_bytes += tx_bytes;
+}
+
+static __rte_always_inline uint16_t
+cnxk_ep_xmit_pkts_scalar_mseg(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq,
+ uint16_t nb_pkts)
+{
+ uint16_t frags, num_sg, mask = OTX_EP_NUM_SG_PTRS - 1;
+ struct otx_ep_buf_free_info *finfo;
+ struct cnxk_ep_instr_32B *iqcmd;
+ struct rte_mbuf *m;
+ uint32_t pkt_len, tx_bytes = 0;
+ uint32_t write_idx = iq->host_write_index;
+ uint16_t pkts, nb_desc = iq->nb_desc;
+ uint8_t desc_size = iq->desc_size;
+
+ for (pkts = 0; pkts < nb_pkts; pkts++) {
+ uint16_t j = 0;
+
+ m = tx_pkts[pkts];
+ frags = m->nb_segs;
+
+ pkt_len = rte_pktmbuf_pkt_len(m);
+ num_sg = (frags + mask) / OTX_EP_NUM_SG_PTRS;
+
+ if (unlikely(pkt_len > OTX_EP_MAX_PKT_SZ && num_sg > OTX_EP_MAX_SG_LISTS)) {
+ otx_ep_err("Failed to xmit the pkt, pkt_len is higher or pkt has more segments\n");
+ goto exit;
+ }
+
+ finfo = &iq->req_list[write_idx].finfo;
+
+ iq->mbuf_list[write_idx] = m;
+ iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size));
+ iqcmd->dptr = rte_mem_virt2iova(finfo->g.sg);
+ iqcmd->ih.u64 = iq->partial_ih | (1ULL << 62) | ((uint64_t)frags << 48) | pkt_len;
+
+ while (frags--) {
+ finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
+ set_sg_size(&finfo->g.sg[(j >> 2)], m->data_len, (j & mask));
+ j++;
+ m = m->next;
+ }
+
+ /* Increment the host write index */
+ write_idx = otx_ep_incr_index(write_idx, 1, nb_desc);
+ tx_bytes += pkt_len;
+ }
+exit:
+ iq->host_write_index = write_idx;
+
+ /* ring dbell */
+ rte_io_wmb();
+ rte_write64(pkts, iq->doorbell_reg);
+ iq->instr_pending += pkts;
+ iq->stats.tx_pkts += pkts;
+ iq->stats.tx_bytes += tx_bytes;
+
+ return pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
+ uint16_t pkts;
+
+ pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending);
+
+ cnxk_ep_xmit_pkts_scalar(tx_pkts, iq, pkts);
+
+ if (iq->instr_pending >= OTX_EP_MAX_INSTR)
+ cnxk_ep_flush_iq(iq);
+
+ /* Return no# of instructions posted successfully. */
+ return pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
+ uint16_t pkts;
+
+ pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending);
+
+ pkts = cnxk_ep_xmit_pkts_scalar_mseg(tx_pkts, iq, pkts);
+
+ if (iq->instr_pending >= OTX_EP_MAX_INSTR)
+ cnxk_ep_flush_iq(iq);
+
+ /* Return no# of instructions posted successfully. */
+ return pkts;
+}
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 7b3669fe0c..ef275703c3 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -156,6 +156,8 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
(void *)iq->inst_cnt_ism, ism_addr);
*iq->inst_cnt_ism = 0;
iq->inst_cnt_ism_prev = 0;
+ iq->partial_ih = ((uint64_t)otx_ep->pkind) << 36;
+
return 0;
}
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.h b/drivers/net/octeon_ep/cnxk_ep_vf.h
index 86277449ea..41d8fbbb3a 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.h
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.h
@@ -6,6 +6,8 @@
#include <rte_io.h>
+#include "otx_ep_common.h"
+
#define CNXK_CONFIG_XPANSION_BAR 0x38
#define CNXK_CONFIG_PCIE_CAP 0x70
#define CNXK_CONFIG_PCIE_DEVCAP 0x74
@@ -178,6 +180,17 @@ struct cnxk_ep_instr_64B {
uint64_t exhdr[4];
};
+struct cnxk_ep_instr_32B {
+ /* Pointer where the input data is available. */
+ uint64_t dptr;
+
+ /* OTX_EP Instruction Header. */
+ union otx_ep_instr_ih ih;
+
+ /* Misc data bytes that can be passed as front data */
+ uint64_t rsvd[2];
+};
+
#define CNXK_EP_IQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue) + 4)
#define CNXK_EP_OQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue))
#define CNXK_EP_ISM_EN (0x1)
diff --git a/drivers/net/octeon_ep/meson.build b/drivers/net/octeon_ep/meson.build
index 4538c0396e..ef5eed6a34 100644
--- a/drivers/net/octeon_ep/meson.build
+++ b/drivers/net/octeon_ep/meson.build
@@ -9,6 +9,8 @@ sources = files(
'otx2_ep_vf.c',
'cnxk_ep_vf.c',
'otx_ep_mbox.c',
+ 'cnxk_ep_rx.c',
+ 'cnxk_ep_tx.c',
)
if (toolchain == 'gcc' and cc.version().version_compare('>=11.0.0'))
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c b/drivers/net/octeon_ep/otx2_ep_vf.c
index f72b8d25d7..7f4edf8dcf 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -307,6 +307,7 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
(unsigned int)ism_addr);
*iq->inst_cnt_ism = 0;
iq->inst_cnt_ism_prev = 0;
+ iq->partial_ih = ((uint64_t)otx_ep->pkind) << 36;
return 0;
}
diff --git a/drivers/net/octeon_ep/otx_ep_common.h b/drivers/net/octeon_ep/otx_ep_common.h
index 90e059cad0..82e57520d3 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -4,7 +4,20 @@
#ifndef _OTX_EP_COMMON_H_
#define _OTX_EP_COMMON_H_
+#include <rte_bitops.h>
#include <rte_spinlock.h>
+#include <unistd.h>
+#include <assert.h>
+#include <rte_eal.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_io.h>
+#include <rte_net.h>
+#include <ethdev_pci.h>
+
+#define OTX_EP_CN8XX RTE_BIT32(0)
+#define OTX_EP_CN9XX RTE_BIT32(1)
+#define OTX_EP_CN10XX RTE_BIT32(2)
#define OTX_EP_NW_PKT_OP 0x1220
#define OTX_EP_NW_CMD_OP 0x1221
@@ -38,7 +51,7 @@
#define OTX_EP_NORESP_OHSM_SEND (4)
#define OTX_EP_NORESP_LAST (4)
#define OTX_EP_PCI_RING_ALIGN 65536
-#define OTX_EP_MAX_SG_LISTS 4
+#define OTX_EP_MAX_SG_LISTS 6
#define OTX_EP_NUM_SG_PTRS 4
#define SDP_PKIND 40
#define SDP_OTX2_PKIND 57
@@ -203,6 +216,38 @@ struct otx_ep_iq_config {
* such structure to represent it.
*/
struct otx_ep_instr_queue {
+ /* Location in memory updated by SDP ISM */
+ uint32_t *inst_cnt_ism;
+ struct rte_mbuf **mbuf_list;
+ /* Pointer to the Virtual Base addr of the input ring. */
+ uint8_t *base_addr;
+
+ /* track inst count locally to consolidate HW counter updates */
+ uint32_t inst_cnt_ism_prev;
+
+ /* Input ring index, where the driver should write the next packet */
+ uint32_t host_write_index;
+
+ /* Input ring index, where the OCTEON 9 should read the next packet */
+ uint32_t otx_read_index;
+ /** This index aids in finding the window in the queue where OCTEON 9
+ * has read the commands.
+ */
+ uint32_t flush_index;
+ /* This keeps track of the instructions pending in this queue. */
+ uint64_t instr_pending;
+
+ /* Memory zone */
+ const struct rte_memzone *iq_mz;
+ /* OTX_EP doorbell register for the ring. */
+ void *doorbell_reg;
+
+ /* OTX_EP instruction count register for this ring. */
+ void *inst_cnt_reg;
+
+ /* Number of instructions pending to be posted to OCTEON 9. */
+ uint32_t fill_cnt;
+
struct otx_ep_device *otx_ep_dev;
uint32_t q_no;
@@ -219,54 +264,21 @@ struct otx_ep_instr_queue {
/* Size of the descriptor. */
uint8_t desc_size;
- /* Input ring index, where the driver should write the next packet */
- uint32_t host_write_index;
-
- /* Input ring index, where the OCTEON 9 should read the next packet */
- uint32_t otx_read_index;
-
uint32_t reset_instr_cnt;
- /** This index aids in finding the window in the queue where OCTEON 9
- * has read the commands.
- */
- uint32_t flush_index;
-
/* Free-running/wrapping instruction counter for IQ. */
uint32_t inst_cnt;
- /* This keeps track of the instructions pending in this queue. */
- uint64_t instr_pending;
-
- /* Pointer to the Virtual Base addr of the input ring. */
- uint8_t *base_addr;
+ uint64_t partial_ih;
/* This IQ request list */
struct otx_ep_instr_list *req_list;
- /* OTX_EP doorbell register for the ring. */
- void *doorbell_reg;
-
- /* OTX_EP instruction count register for this ring. */
- void *inst_cnt_reg;
-
- /* Number of instructions pending to be posted to OCTEON 9. */
- uint32_t fill_cnt;
-
/* Statistics for this input queue. */
struct otx_ep_iq_stats stats;
/* DMA mapped base address of the input descriptor ring. */
uint64_t base_addr_dma;
-
- /* Memory zone */
- const struct rte_memzone *iq_mz;
-
- /* Location in memory updated by SDP ISM */
- uint32_t *inst_cnt_ism;
-
- /* track inst count locally to consolidate HW counter updates */
- uint32_t inst_cnt_ism_prev;
};
/** Descriptor format.
@@ -344,14 +356,17 @@ struct otx_ep_oq_config {
/* The Descriptor Ring Output Queue(DROQ) structure. */
struct otx_ep_droq {
- struct otx_ep_device *otx_ep_dev;
/* The 8B aligned descriptor ring starts at this address. */
struct otx_ep_droq_desc *desc_ring;
- uint32_t q_no;
- uint64_t last_pkt_count;
+ /* The 8B aligned info ptrs begin from this address. */
+ struct otx_ep_droq_info *info_list;
- struct rte_mempool *mpool;
+ /* receive buffer list contains mbuf ptr list */
+ struct rte_mbuf **recv_buf_list;
+
+ /* Packets pending to be processed */
+ uint64_t pkts_pending;
/* Driver should read the next packet at this index */
uint32_t read_idx;
@@ -362,22 +377,17 @@ struct otx_ep_droq {
/* At this index, the driver will refill the descriptor's buffer */
uint32_t refill_idx;
- /* Packets pending to be processed */
- uint64_t pkts_pending;
+ /* The number of descriptors pending to refill. */
+ uint32_t refill_count;
/* Number of descriptors in this ring. */
uint32_t nb_desc;
- /* The number of descriptors pending to refill. */
- uint32_t refill_count;
-
uint32_t refill_threshold;
- /* The 8B aligned info ptrs begin from this address. */
- struct otx_ep_droq_info *info_list;
+ uint64_t last_pkt_count;
- /* receive buffer list contains mbuf ptr list */
- struct rte_mbuf **recv_buf_list;
+ struct rte_mempool *mpool;
/* The size of each buffer pointed by the buffer pointer. */
uint32_t buffer_size;
@@ -392,6 +402,13 @@ struct otx_ep_droq {
*/
void *pkts_sent_reg;
+ /* Pointer to host memory copy of output packet count, set by ISM */
+ uint32_t *pkts_sent_ism;
+ uint32_t pkts_sent_ism_prev;
+
+ /* Statistics for this DROQ. */
+ struct otx_ep_droq_stats stats;
+
/** Handle DMA incompletion during pkt reads.
* This variable is used to initiate a sent_reg_read
* that completes pending dma
@@ -400,8 +417,9 @@ struct otx_ep_droq {
*/
uint32_t sent_reg_val;
- /* Statistics for this DROQ. */
- struct otx_ep_droq_stats stats;
+ uint32_t q_no;
+
+ struct otx_ep_device *otx_ep_dev;
/* DMA mapped address of the DROQ descriptor ring. */
size_t desc_ring_dma;
@@ -419,10 +437,6 @@ struct otx_ep_droq {
const struct rte_memzone *desc_ring_mz;
const struct rte_memzone *info_mz;
-
- /* Pointer to host memory copy of output packet count, set by ISM */
- uint32_t *pkts_sent_ism;
- uint32_t pkts_sent_ism_prev;
};
#define OTX_EP_DROQ_SIZE (sizeof(struct otx_ep_droq))
@@ -545,6 +559,9 @@ struct otx_ep_device {
/* Negotiated Mbox version */
uint32_t mbox_neg_ver;
+
+ /* Generation */
+ uint32_t chip_gen;
};
int otx_ep_setup_iqs(struct otx_ep_device *otx_ep, uint32_t iq_no,
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c b/drivers/net/octeon_ep/otx_ep_ethdev.c
index 57b965ad06..e965cbaa16 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -27,6 +27,46 @@ static const struct rte_eth_desc_lim otx_ep_tx_desc_lim = {
.nb_align = OTX_EP_TXD_ALIGN,
};
+static void
+otx_ep_set_tx_func(struct rte_eth_dev *eth_dev)
+{
+ struct otx_ep_device *otx_epvf = OTX_EP_DEV(eth_dev);
+
+ if (otx_epvf->chip_gen == OTX_EP_CN10XX || otx_epvf->chip_gen == OTX_EP_CN9XX) {
+ eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts;
+ if (otx_epvf->tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS)
+ eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts_mseg;
+ } else {
+ eth_dev->tx_pkt_burst = &otx_ep_xmit_pkts;
+ }
+
+ if (eth_dev->data->dev_started)
+ rte_eth_fp_ops[eth_dev->data->port_id].tx_pkt_burst =
+ eth_dev->tx_pkt_burst;
+}
+
+static void
+otx_ep_set_rx_func(struct rte_eth_dev *eth_dev)
+{
+ struct otx_ep_device *otx_epvf = OTX_EP_DEV(eth_dev);
+
+ if (otx_epvf->chip_gen == OTX_EP_CN10XX) {
+ eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts;
+ if (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts_mseg;
+ } else if (otx_epvf->chip_gen == OTX_EP_CN9XX) {
+ eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts;
+ if (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts_mseg;
+ } else {
+ eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
+ }
+
+ if (eth_dev->data->dev_started)
+ rte_eth_fp_ops[eth_dev->data->port_id].rx_pkt_burst =
+ eth_dev->rx_pkt_burst;
+}
+
static int
otx_ep_dev_info_get(struct rte_eth_dev *eth_dev,
struct rte_eth_dev_info *devinfo)
@@ -154,6 +194,10 @@ otx_ep_dev_start(struct rte_eth_dev *eth_dev)
}
otx_ep_dev_link_update(eth_dev, 0);
+
+ otx_ep_set_tx_func(eth_dev);
+ otx_ep_set_rx_func(eth_dev);
+
otx_ep_info("dev started\n");
return 0;
@@ -255,18 +299,23 @@ otx_epdev_init(struct otx_ep_device *otx_epvf)
otx_epvf->fn_list.setup_device_regs(otx_epvf);
+ otx_epvf->eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts;
otx_epvf->eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
- if (otx_epvf->chip_id == PCI_DEVID_OCTEONTX_EP_VF)
+ if (otx_epvf->chip_id == PCI_DEVID_OCTEONTX_EP_VF) {
otx_epvf->eth_dev->tx_pkt_burst = &otx_ep_xmit_pkts;
- else if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
+ otx_epvf->chip_gen = OTX_EP_CN8XX;
+ } else if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CN98XX_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CNF95N_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF) {
- otx_epvf->eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
+ otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF) {
+ otx_epvf->eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts;
+ otx_epvf->chip_gen = OTX_EP_CN9XX;
+ } else if (otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF) {
+ otx_epvf->eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts;
+ otx_epvf->chip_gen = OTX_EP_CN10XX;
} else {
otx_ep_err("Invalid chip_id\n");
ret = -EINVAL;
@@ -656,8 +705,8 @@ otx_ep_eth_dev_init(struct rte_eth_dev *eth_dev)
/* Single process support */
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
eth_dev->dev_ops = &otx_ep_eth_dev_ops;
- eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
- eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
+ otx_ep_set_tx_func(eth_dev);
+ otx_ep_set_rx_func(eth_dev);
return 0;
}
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c b/drivers/net/octeon_ep/otx_ep_rxtx.c
index ea7c9a5d62..e7556c5fd2 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -13,15 +13,8 @@
#include "otx_ep_common.h"
#include "otx_ep_vf.h"
-#include "otx2_ep_vf.h"
#include "otx_ep_rxtx.h"
-/* SDP_LENGTH_S specifies packet length and is of 8-byte size */
-#define OTX_EP_INFO_SIZE 8
-#define OTX_EP_FSZ_FS0 0
-#define DROQ_REFILL_THRESHOLD 16
-#define OTX2_SDP_REQUEST_ISM (0x1ULL << 63)
-
static void
otx_ep_dmazone_free(const struct rte_memzone *mz)
{
@@ -144,6 +137,13 @@ otx_ep_init_instr_queue(struct otx_ep_device *otx_ep, int iq_no, int num_descs,
iq_no, iq->base_addr, (unsigned long)iq->base_addr_dma,
iq->nb_desc);
+ iq->mbuf_list = rte_zmalloc_socket("mbuf_list", (iq->nb_desc * sizeof(struct rte_mbuf *)),
+ RTE_CACHE_LINE_SIZE, rte_socket_id());
+ if (!iq->mbuf_list) {
+ otx_ep_err("IQ[%d] mbuf_list alloc failed\n", iq_no);
+ goto iq_init_fail;
+ }
+
iq->otx_ep_dev = otx_ep;
iq->q_no = iq_no;
iq->fill_cnt = 0;
@@ -673,85 +673,6 @@ otx_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
return count;
}
-/* Enqueue requests/packets to OTX_EP IQ queue.
- * returns number of requests enqueued successfully
- */
-uint16_t
-otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
-{
- struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
- struct otx_ep_device *otx_ep = iq->otx_ep_dev;
- struct otx2_ep_instr_64B iqcmd2;
- uint32_t iqreq_type;
- struct rte_mbuf *m;
- uint32_t pkt_len;
- int count = 0;
- uint16_t i;
- int dbell;
- int index;
-
- iqcmd2.ih.u64 = 0;
- iqcmd2.irh.u64 = 0;
-
- /* ih invars */
- iqcmd2.ih.s.fsz = OTX_EP_FSZ_FS0;
- iqcmd2.ih.s.pkind = otx_ep->pkind; /* The SDK decided PKIND value */
- /* irh invars */
- iqcmd2.irh.s.opcode = OTX_EP_NW_PKT_OP;
-
- for (i = 0; i < nb_pkts; i++) {
- m = pkts[i];
- if (m->nb_segs == 1) {
- pkt_len = rte_pktmbuf_data_len(m);
- iqcmd2.ih.s.tlen = pkt_len + iqcmd2.ih.s.fsz;
- iqcmd2.dptr = rte_mbuf_data_iova(m); /*dptr*/
- iqcmd2.ih.s.gather = 0;
- iqcmd2.ih.s.gsz = 0;
- iqreq_type = OTX_EP_REQTYPE_NORESP_NET;
- } else {
- if (!(otx_ep->tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS))
- goto xmit_fail;
-
- if (unlikely(prepare_xmit_gather_list(iq, m, &iqcmd2.dptr, &iqcmd2.ih) < 0))
- goto xmit_fail;
-
- pkt_len = rte_pktmbuf_pkt_len(m);
- iqreq_type = OTX_EP_REQTYPE_NORESP_GATHER;
- }
-
- iqcmd2.irh.u64 = rte_bswap64(iqcmd2.irh.u64);
-
-#ifdef OTX_EP_IO_DEBUG
- otx_ep_dbg("After swapping\n");
- otx_ep_dbg("Word0 [dptr]: 0x%016lx\n",
- (unsigned long)iqcmd.dptr);
- otx_ep_dbg("Word1 [ihtx]: 0x%016lx\n", (unsigned long)iqcmd.ih);
- otx_ep_dbg("Word2 [pki_ih3]: 0x%016lx\n",
- (unsigned long)iqcmd.pki_ih3);
- otx_ep_dbg("Word3 [rptr]: 0x%016lx\n",
- (unsigned long)iqcmd.rptr);
- otx_ep_dbg("Word4 [irh]: 0x%016lx\n", (unsigned long)iqcmd.irh);
- otx_ep_dbg("Word5 [exhdr[0]]: 0x%016lx\n",
- (unsigned long)iqcmd.exhdr[0]);
-#endif
- index = iq->host_write_index;
- dbell = (i == (unsigned int)(nb_pkts - 1)) ? 1 : 0;
- if (otx_ep_send_data(otx_ep, iq, &iqcmd2, dbell))
- goto xmit_fail;
- otx_ep_iqreq_add(iq, m, iqreq_type, index);
- iq->stats.tx_pkts++;
- iq->stats.tx_bytes += pkt_len;
- count++;
- }
-
-xmit_fail:
- if (iq->instr_pending >= OTX_EP_MAX_INSTR)
- otx_ep_flush_iq(iq);
-
- /* Return no# of instructions posted successfully. */
- return count;
-}
-
static uint32_t
otx_ep_droq_refill(struct otx_ep_droq *droq)
{
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.h b/drivers/net/octeon_ep/otx_ep_rxtx.h
index 3f12527004..cb68ef3b41 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.h
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.h
@@ -7,29 +7,53 @@
#include <rte_byteorder.h>
-#define OTX_EP_RXD_ALIGN 2
-#define OTX_EP_TXD_ALIGN 2
+#define OTX_EP_RXD_ALIGN 8
+#define OTX_EP_TXD_ALIGN 8
#define OTX_EP_IQ_SEND_FAILED (-1)
#define OTX_EP_IQ_SEND_SUCCESS (0)
-#define OTX_EP_MAX_DELAYED_PKT_RETRIES 10000
+#define OTX_EP_MAX_DELAYED_PKT_RETRIES 10
#define OTX_EP_FSZ 28
#define OTX2_EP_FSZ 24
-#define OTX_EP_MAX_INSTR 16
+#define OTX_EP_MAX_INSTR 128
+
+/* SDP_LENGTH_S specifies packet length and is of 8-byte size */
+#define OTX_EP_INFO_SIZE 8
+#define DROQ_REFILL_THRESHOLD 16
+#define OTX2_SDP_REQUEST_ISM (0x1ULL << 63)
static inline uint32_t
otx_ep_incr_index(uint32_t index, uint32_t count, uint32_t max)
{
return ((index + count) & (max - 1));
}
+
uint16_t
otx_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
uint16_t
otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+otx_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cnxk_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+cnxk_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cnxk_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cn9k_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
uint16_t
-otx_ep_recv_pkts(void *rx_queue,
- struct rte_mbuf **rx_pkts,
- uint16_t budget);
+cn9k_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
#endif /* _OTX_EP_RXTX_H_ */
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v3 0/3] rewrite fastpath routines
2023-10-11 8:36 ` [PATCH v2 0/3] rewrite " Vamsi Attunuru
` (2 preceding siblings ...)
2023-10-11 8:36 ` [PATCH v2 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
@ 2023-10-11 12:53 ` Vamsi Attunuru
2023-10-11 12:53 ` [PATCH v3 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
` (3 more replies)
3 siblings, 4 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-11 12:53 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
This series adds new fastpath routines for cn10k & cn9k endpoint
devices and supports 32B Tx desciptor format which improves the
performance.
v2 & v3 changes:
- Fixed CI
Shijith Thotton (1):
net/octeon_ep: support 32B IQ descriptor size
Vamsi Attunuru (2):
net/octeon_ep: clean up receive routine
net/octeon_ep: add new fastpath routines
drivers/net/octeon_ep/cnxk_ep_rx.c | 309 ++++++++++++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_tx.c | 209 +++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_vf.c | 12 +-
drivers/net/octeon_ep/cnxk_ep_vf.h | 13 ++
drivers/net/octeon_ep/meson.build | 2 +
drivers/net/octeon_ep/otx2_ep_vf.c | 11 +-
drivers/net/octeon_ep/otx_ep_common.h | 127 ++++++-----
drivers/net/octeon_ep/otx_ep_ethdev.c | 69 +++++-
drivers/net/octeon_ep/otx_ep_rxtx.c | 255 +++++++--------------
drivers/net/octeon_ep/otx_ep_rxtx.h | 38 +++-
drivers/net/octeon_ep/otx_ep_vf.c | 8 +
11 files changed, 801 insertions(+), 252 deletions(-)
create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx.c
create mode 100644 drivers/net/octeon_ep/cnxk_ep_tx.c
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v3 1/3] net/octeon_ep: support 32B IQ descriptor size
2023-10-11 12:53 ` [PATCH v3 0/3] rewrite " Vamsi Attunuru
@ 2023-10-11 12:53 ` Vamsi Attunuru
2023-10-11 12:53 ` [PATCH v3 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
` (2 subsequent siblings)
3 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-11 12:53 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton
From: Shijith Thotton <sthotton@marvell.com>
Update input queue setup to consider descriptor size in driver conf.
The default instruction size for otx2 and cnxk devices has been updated
to 32 bytes.
Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
drivers/net/octeon_ep/cnxk_ep_vf.c | 10 +++++++++-
drivers/net/octeon_ep/otx2_ep_vf.c | 10 +++++++++-
drivers/net/octeon_ep/otx_ep_common.h | 4 ++++
drivers/net/octeon_ep/otx_ep_vf.c | 8 ++++++++
4 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 92c2d2ca5c..7b3669fe0c 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -106,6 +106,14 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(CNXK_EP_R_IN_CTL_IS_64B);
+ else
+ reg_val |= CNXK_EP_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + CNXK_EP_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + CNXK_EP_R_IN_INSTR_RSIZE(iq_no));
@@ -354,7 +362,7 @@ static const struct otx_ep_config default_cnxk_ep_conf = {
/* IQ attributes */
.iq = {
.max_iqs = OTX_EP_CFG_IO_QUEUES,
- .instr_type = OTX_EP_64BYTE_INSTR,
+ .instr_type = OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c b/drivers/net/octeon_ep/otx2_ep_vf.c
index ced3a415a5..f72b8d25d7 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -256,6 +256,14 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(SDP_VF_R_IN_CTL_IS_64B);
+ else
+ reg_val |= SDP_VF_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + SDP_VF_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_RSIZE(iq_no));
@@ -500,7 +508,7 @@ static const struct otx_ep_config default_otx2_ep_conf = {
/* IQ attributes */
.iq = {
.max_iqs = OTX_EP_CFG_IO_QUEUES,
- .instr_type = OTX_EP_64BYTE_INSTR,
+ .instr_type = OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx_ep_common.h b/drivers/net/octeon_ep/otx_ep_common.h
index c150cbe619..90e059cad0 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -11,6 +11,7 @@
#define OTX_EP_MAX_RINGS_PER_VF (8)
#define OTX_EP_CFG_IO_QUEUES OTX_EP_MAX_RINGS_PER_VF
+#define OTX_EP_32BYTE_INSTR (32)
#define OTX_EP_64BYTE_INSTR (64)
/*
* Backpressure for SDP is configured on Octeon, and the minimum queue sizes
@@ -215,6 +216,9 @@ struct otx_ep_instr_queue {
/* Number of descriptors in this ring. */
uint32_t nb_desc;
+ /* Size of the descriptor. */
+ uint8_t desc_size;
+
/* Input ring index, where the driver should write the next packet */
uint32_t host_write_index;
diff --git a/drivers/net/octeon_ep/otx_ep_vf.c b/drivers/net/octeon_ep/otx_ep_vf.c
index 4f3538146b..236b7a874c 100644
--- a/drivers/net/octeon_ep/otx_ep_vf.c
+++ b/drivers/net/octeon_ep/otx_ep_vf.c
@@ -120,6 +120,14 @@ otx_ep_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (iq->desc_size == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(OTX_EP_R_IN_CTL_IS_64B);
+ else
+ reg_val |= OTX_EP_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + OTX_EP_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
otx_ep_write64(iq->base_addr_dma, otx_ep->hw_addr,
OTX_EP_R_IN_INSTR_BADDR(iq_no));
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v3 2/3] net/octeon_ep: clean up receive routine
2023-10-11 12:53 ` [PATCH v3 0/3] rewrite " Vamsi Attunuru
2023-10-11 12:53 ` [PATCH v3 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
@ 2023-10-11 12:53 ` Vamsi Attunuru
2023-10-11 12:53 ` [PATCH v3 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
2023-10-12 6:23 ` [PATCH v4 0/3] rewrite " Vamsi Attunuru
3 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-11 12:53 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
Patch improves Rx routine and pkt count update routines,
packet count update routines need to drain inflight ISM
memory updates while decrementing the packet count register.
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
---
drivers/net/octeon_ep/otx_ep_rxtx.c | 162 ++++++++++++----------------
1 file changed, 68 insertions(+), 94 deletions(-)
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c b/drivers/net/octeon_ep/otx_ep_rxtx.c
index b37fc8109f..4c509a419f 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -442,7 +442,14 @@ otx_vf_update_read_index(struct otx_ep_instr_queue *iq)
* when count above halfway to saturation.
*/
rte_write32(val, iq->inst_cnt_reg);
- *iq->inst_cnt_ism = 0;
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ while (__atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ rte_mb();
+ }
+
iq->inst_cnt_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
@@ -567,9 +574,7 @@ prepare_xmit_gather_list(struct otx_ep_instr_queue *iq, struct rte_mbuf *m, uint
finfo = &iq->req_list[iq->host_write_index].finfo;
*dptr = rte_mem_virt2iova(finfo->g.sg);
- ih->s.tlen = pkt_len + ih->s.fsz;
- ih->s.gsz = frags;
- ih->s.gather = 1;
+ ih->u64 |= ((1ULL << 62) | ((uint64_t)frags << 48) | (pkt_len + ih->s.fsz));
while (frags--) {
finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
@@ -752,36 +757,26 @@ otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
static uint32_t
otx_ep_droq_refill(struct otx_ep_droq *droq)
{
- struct otx_ep_droq_desc *desc_ring;
+ struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
struct otx_ep_droq_info *info;
struct rte_mbuf *buf = NULL;
uint32_t desc_refilled = 0;
- desc_ring = droq->desc_ring;
-
while (droq->refill_count && (desc_refilled < droq->nb_desc)) {
- /* If a valid buffer exists (happens if there is no dispatch),
- * reuse the buffer, else allocate.
- */
- if (droq->recv_buf_list[droq->refill_idx] != NULL)
- break;
-
buf = rte_pktmbuf_alloc(droq->mpool);
/* If a buffer could not be allocated, no point in
* continuing
*/
- if (buf == NULL) {
+ if (unlikely(!buf)) {
droq->stats.rx_alloc_failure++;
break;
}
info = rte_pktmbuf_mtod(buf, struct otx_ep_droq_info *);
- memset(info, 0, sizeof(*info));
+ info->length = 0;
droq->recv_buf_list[droq->refill_idx] = buf;
desc_ring[droq->refill_idx].buffer_ptr =
rte_mbuf_data_iova_default(buf);
-
-
droq->refill_idx = otx_ep_incr_index(droq->refill_idx, 1,
droq->nb_desc);
@@ -793,21 +788,18 @@ otx_ep_droq_refill(struct otx_ep_droq *droq)
}
static struct rte_mbuf *
-otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
- struct otx_ep_droq *droq, int next_fetch)
+otx_ep_droq_read_packet(struct otx_ep_device *otx_ep, struct otx_ep_droq *droq, int next_fetch)
{
volatile struct otx_ep_droq_info *info;
- struct rte_mbuf *droq_pkt2 = NULL;
- struct rte_mbuf *droq_pkt = NULL;
- struct rte_net_hdr_lens hdr_lens;
- struct otx_ep_droq_info *info2;
+ struct rte_mbuf *mbuf_next = NULL;
+ struct rte_mbuf *mbuf = NULL;
uint64_t total_pkt_len;
uint32_t pkt_len = 0;
int next_idx;
- droq_pkt = droq->recv_buf_list[droq->read_idx];
- droq_pkt2 = droq->recv_buf_list[droq->read_idx];
- info = rte_pktmbuf_mtod(droq_pkt, struct otx_ep_droq_info *);
+ mbuf = droq->recv_buf_list[droq->read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
/* make sure info is available */
rte_rmb();
if (unlikely(!info->length)) {
@@ -828,32 +820,25 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
assert(0);
}
}
+
if (next_fetch) {
next_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
- droq_pkt2 = droq->recv_buf_list[next_idx];
- info2 = rte_pktmbuf_mtod(droq_pkt2, struct otx_ep_droq_info *);
- rte_prefetch_non_temporal((const void *)info2);
+ mbuf_next = droq->recv_buf_list[next_idx];
+ rte_prefetch0(rte_pktmbuf_mtod(mbuf_next, void *));
}
- info->length = rte_bswap64(info->length);
+ info->length = rte_bswap16(info->length >> 48);
/* Deduce the actual data size */
total_pkt_len = info->length + OTX_EP_INFO_SIZE;
if (total_pkt_len <= droq->buffer_size) {
- droq_pkt = droq->recv_buf_list[droq->read_idx];
- if (likely(droq_pkt != NULL)) {
- droq_pkt->data_off += OTX_EP_INFO_SIZE;
- /* otx_ep_dbg("OQ: pkt_len[%ld], buffer_size %d\n",
- * (long)info->length, droq->buffer_size);
- */
- pkt_len = (uint32_t)info->length;
- droq_pkt->pkt_len = pkt_len;
- droq_pkt->data_len = pkt_len;
- droq_pkt->port = otx_ep->port_id;
- droq->recv_buf_list[droq->read_idx] = NULL;
- droq->read_idx = otx_ep_incr_index(droq->read_idx, 1,
- droq->nb_desc);
- droq->refill_count++;
- }
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ pkt_len = (uint32_t)info->length;
+ mbuf->pkt_len = pkt_len;
+ mbuf->data_len = pkt_len;
+ mbuf->port = otx_ep->port_id;
+ droq->recv_buf_list[droq->read_idx] = NULL;
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
+ droq->refill_count++;
} else {
struct rte_mbuf *first_buf = NULL;
struct rte_mbuf *last_buf = NULL;
@@ -865,61 +850,50 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
while (pkt_len < total_pkt_len) {
int cpy_len = 0;
- cpy_len = ((pkt_len + droq->buffer_size) >
- total_pkt_len)
- ? ((uint32_t)total_pkt_len -
- pkt_len)
+ cpy_len = ((pkt_len + droq->buffer_size) > total_pkt_len)
+ ? ((uint32_t)total_pkt_len - pkt_len)
: droq->buffer_size;
- droq_pkt = droq->recv_buf_list[droq->read_idx];
+ mbuf = droq->recv_buf_list[droq->read_idx];
droq->recv_buf_list[droq->read_idx] = NULL;
- if (likely(droq_pkt != NULL)) {
+ if (likely(mbuf)) {
/* Note the first seg */
if (!pkt_len)
- first_buf = droq_pkt;
+ first_buf = mbuf;
- droq_pkt->port = otx_ep->port_id;
+ mbuf->port = otx_ep->port_id;
if (!pkt_len) {
- droq_pkt->data_off +=
- OTX_EP_INFO_SIZE;
- droq_pkt->pkt_len =
- cpy_len - OTX_EP_INFO_SIZE;
- droq_pkt->data_len =
- cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_len = cpy_len - OTX_EP_INFO_SIZE;
} else {
- droq_pkt->pkt_len = cpy_len;
- droq_pkt->data_len = cpy_len;
+ mbuf->pkt_len = cpy_len;
+ mbuf->data_len = cpy_len;
}
if (pkt_len) {
first_buf->nb_segs++;
- first_buf->pkt_len += droq_pkt->pkt_len;
+ first_buf->pkt_len += mbuf->pkt_len;
}
if (last_buf)
- last_buf->next = droq_pkt;
+ last_buf->next = mbuf;
- last_buf = droq_pkt;
+ last_buf = mbuf;
} else {
otx_ep_err("no buf\n");
assert(0);
}
pkt_len += cpy_len;
- droq->read_idx = otx_ep_incr_index(droq->read_idx, 1,
- droq->nb_desc);
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
droq->refill_count++;
}
- droq_pkt = first_buf;
+ mbuf = first_buf;
}
- droq_pkt->packet_type = rte_net_get_ptype(droq_pkt, &hdr_lens,
- RTE_PTYPE_ALL_MASK);
- droq_pkt->l2_len = hdr_lens.l2_len;
- droq_pkt->l3_len = hdr_lens.l3_len;
- droq_pkt->l4_len = hdr_lens.l4_len;
- return droq_pkt;
+ return mbuf;
}
static inline uint32_t
@@ -943,7 +917,14 @@ otx_ep_check_droq_pkts(struct otx_ep_droq *droq)
* when count above halfway to saturation.
*/
rte_write32(val, droq->pkts_sent_reg);
- *droq->pkts_sent_ism = 0;
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ while (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ rte_mb();
+ }
+
droq->pkts_sent_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
@@ -952,36 +933,30 @@ otx_ep_check_droq_pkts(struct otx_ep_droq *droq)
return new_pkts;
}
+static inline int32_t __rte_hot
+otx_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
+{
+ if (unlikely(droq->pkts_pending < nb_pkts))
+ otx_ep_check_droq_pkts(droq);
+
+ return RTE_MIN(nb_pkts, droq->pkts_pending);
+}
+
/* Check for response arrival from OCTEON 9
* returns number of requests completed
*/
uint16_t
-otx_ep_recv_pkts(void *rx_queue,
- struct rte_mbuf **rx_pkts,
- uint16_t budget)
+otx_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
{
struct otx_ep_droq *droq = rx_queue;
struct otx_ep_device *otx_ep;
struct rte_mbuf *oq_pkt;
-
- uint32_t pkts = 0;
+ uint16_t pkts, new_pkts;
uint32_t valid_pkts = 0;
- uint32_t new_pkts = 0;
int next_fetch;
otx_ep = droq->otx_ep_dev;
-
- if (droq->pkts_pending > budget) {
- new_pkts = budget;
- } else {
- new_pkts = droq->pkts_pending;
- new_pkts += otx_ep_check_droq_pkts(droq);
- if (new_pkts > budget)
- new_pkts = budget;
- }
-
- if (!new_pkts)
- goto update_credit; /* No pkts at this moment */
+ new_pkts = otx_ep_rx_pkts_to_process(droq, nb_pkts);
for (pkts = 0; pkts < new_pkts; pkts++) {
/* Push the received pkt to application */
@@ -1006,7 +981,6 @@ otx_ep_recv_pkts(void *rx_queue,
droq->pkts_pending -= pkts;
/* Refill DROQ buffers */
-update_credit:
if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
int desc_refilled = otx_ep_droq_refill(droq);
@@ -1014,7 +988,7 @@ otx_ep_recv_pkts(void *rx_queue,
* that when we update the credits the data in memory is
* accurate.
*/
- rte_wmb();
+ rte_io_wmb();
rte_write32(desc_refilled, droq->pkts_credit_reg);
} else {
/*
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v3 3/3] net/octeon_ep: add new fastpath routines
2023-10-11 12:53 ` [PATCH v3 0/3] rewrite " Vamsi Attunuru
2023-10-11 12:53 ` [PATCH v3 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
2023-10-11 12:53 ` [PATCH v3 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
@ 2023-10-11 12:53 ` Vamsi Attunuru
2023-10-12 6:23 ` [PATCH v4 0/3] rewrite " Vamsi Attunuru
3 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-11 12:53 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
Adds new fastpath routines for cn10k & cn9k endpoint
devices and assigns the fastpath routines based on
the offload flags.
Patch also adds misc changes to improve performance
and code-readability.
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
---
drivers/net/octeon_ep/cnxk_ep_rx.c | 309 ++++++++++++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_tx.c | 209 +++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_vf.c | 2 +
drivers/net/octeon_ep/cnxk_ep_vf.h | 13 ++
drivers/net/octeon_ep/meson.build | 2 +
drivers/net/octeon_ep/otx2_ep_vf.c | 1 +
drivers/net/octeon_ep/otx_ep_common.h | 125 ++++++-----
drivers/net/octeon_ep/otx_ep_ethdev.c | 69 +++++-
drivers/net/octeon_ep/otx_ep_rxtx.c | 93 +-------
drivers/net/octeon_ep/otx_ep_rxtx.h | 38 +++-
10 files changed, 704 insertions(+), 157 deletions(-)
diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.c b/drivers/net/octeon_ep/cnxk_ep_rx.c
new file mode 100644
index 0000000000..74f0011283
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.c
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "otx_ep_common.h"
+#include "otx2_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static inline int
+cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)
+{
+ struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t refill_idx = droq->refill_idx;
+ struct rte_mbuf *buf;
+ uint32_t i;
+ int rc;
+
+ rc = rte_pktmbuf_alloc_bulk(droq->mpool, &recv_buf_list[refill_idx], count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return rc;
+ }
+
+ for (i = 0; i < count; i++) {
+ buf = recv_buf_list[refill_idx];
+ desc_ring[refill_idx].buffer_ptr = rte_mbuf_data_iova_default(buf);
+ refill_idx++;
+ }
+
+ droq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, droq->nb_desc);
+ droq->refill_count -= count;
+
+ return 0;
+}
+
+static inline void
+cnxk_ep_rx_refill(struct otx_ep_droq *droq)
+{
+ uint32_t desc_refilled = 0, count;
+ uint32_t nb_desc = droq->nb_desc;
+ uint32_t refill_idx = droq->refill_idx;
+ int rc;
+
+ if (unlikely(droq->read_idx == refill_idx))
+ return;
+
+ if (refill_idx < droq->read_idx) {
+ count = droq->read_idx - refill_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+ desc_refilled = count;
+ } else {
+ count = nb_desc - refill_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+
+ desc_refilled = count;
+ count = droq->read_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+ desc_refilled += count;
+ }
+
+ /* Flush the droq descriptor data to memory to be sure
+ * that when we update the credits the data in memory is
+ * accurate.
+ */
+ rte_io_wmb();
+ rte_write32(desc_refilled, droq->pkts_credit_reg);
+}
+
+static inline uint32_t
+cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
+{
+ uint32_t new_pkts;
+ uint32_t val;
+
+ /* Batch subtractions from the HW counter to reduce PCIe traffic
+ * This adds an extra local variable, but almost halves the
+ * number of PCIe writes.
+ */
+ val = __atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED);
+ new_pkts = val - droq->pkts_sent_ism_prev;
+ droq->pkts_sent_ism_prev = val;
+
+ if (val > (uint32_t)(1 << 31)) {
+ /* Only subtract the packet count in the HW counter
+ * when count above halfway to saturation.
+ */
+ rte_write64((uint64_t)val, droq->pkts_sent_reg);
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ while (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ rte_mb();
+ }
+
+ droq->pkts_sent_ism_prev = 0;
+ }
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ droq->pkts_pending += new_pkts;
+
+ return new_pkts;
+}
+
+static inline int16_t __rte_hot
+cnxk_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
+{
+ if (droq->pkts_pending < nb_pkts)
+ cnxk_ep_check_rx_pkts(droq);
+
+ return RTE_MIN(nb_pkts, droq->pkts_pending);
+}
+
+static __rte_always_inline void
+cnxk_ep_process_pkts_scalar(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, uint16_t new_pkts)
+{
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t bytes_rsvd = 0, read_idx = droq->read_idx;
+ uint16_t port_id = droq->otx_ep_dev->port_id;
+ uint16_t nb_desc = droq->nb_desc;
+ uint16_t pkts;
+
+ for (pkts = 0; pkts < new_pkts; pkts++) {
+ struct otx_ep_droq_info *info;
+ struct rte_mbuf *mbuf;
+ uint16_t pkt_len;
+
+ mbuf = recv_buf_list[read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+ read_idx = otx_ep_incr_index(read_idx, 1, nb_desc);
+ pkt_len = rte_bswap16(info->length >> 48);
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = pkt_len;
+ mbuf->data_len = pkt_len;
+ mbuf->port = port_id;
+ rx_pkts[pkts] = mbuf;
+ bytes_rsvd += pkt_len;
+ }
+ droq->read_idx = read_idx;
+
+ droq->refill_count += new_pkts;
+ droq->pkts_pending -= new_pkts;
+ /* Stats */
+ droq->stats.pkts_received += new_pkts;
+ droq->stats.bytes_received += bytes_rsvd;
+}
+
+static __rte_always_inline void
+cnxk_ep_process_pkts_scalar_mseg(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq,
+ uint16_t new_pkts)
+{
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t total_pkt_len, bytes_rsvd = 0;
+ uint16_t port_id = droq->otx_ep_dev->port_id;
+ uint16_t nb_desc = droq->nb_desc;
+ uint16_t pkts;
+
+ for (pkts = 0; pkts < new_pkts; pkts++) {
+ struct otx_ep_droq_info *info;
+ struct rte_mbuf *first_buf = NULL;
+ struct rte_mbuf *last_buf = NULL;
+ struct rte_mbuf *mbuf;
+ uint32_t pkt_len = 0;
+
+ mbuf = recv_buf_list[droq->read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
+ total_pkt_len = rte_bswap16(info->length >> 48) + OTX_EP_INFO_SIZE;
+
+ while (pkt_len < total_pkt_len) {
+ int cpy_len;
+
+ cpy_len = ((pkt_len + droq->buffer_size) > total_pkt_len)
+ ? ((uint32_t)total_pkt_len - pkt_len) : droq->buffer_size;
+
+ mbuf = droq->recv_buf_list[droq->read_idx];
+
+ if (!pkt_len) {
+ /* Note the first seg */
+ first_buf = mbuf;
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_len = cpy_len - OTX_EP_INFO_SIZE;
+ } else {
+ mbuf->pkt_len = cpy_len;
+ mbuf->data_len = cpy_len;
+ first_buf->nb_segs++;
+ first_buf->pkt_len += mbuf->pkt_len;
+ }
+
+ if (last_buf)
+ last_buf->next = mbuf;
+
+ last_buf = mbuf;
+
+ pkt_len += cpy_len;
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, nb_desc);
+ droq->refill_count++;
+ }
+ mbuf = first_buf;
+ mbuf->port = port_id;
+ rx_pkts[pkts] = mbuf;
+ bytes_rsvd += pkt_len;
+ }
+
+ droq->refill_count += new_pkts;
+ droq->pkts_pending -= pkts;
+ /* Stats */
+ droq->stats.pkts_received += pkts;
+ droq->stats.bytes_received += bytes_rsvd;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD)
+ cnxk_ep_rx_refill(droq);
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cn9k_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
+ cnxk_ep_rx_refill(droq);
+ } else {
+ /* SDP output goes into DROP state when output doorbell count
+ * goes below drop count. When door bell count is written with
+ * a value greater than drop count SDP output should come out
+ * of DROP state. Due to a race condition this is not happening.
+ * Writing doorbell register with 0 again may make SDP output
+ * come out of this state.
+ */
+
+ rte_write32(0, droq->pkts_credit_reg);
+ }
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar_mseg(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD)
+ cnxk_ep_rx_refill(droq);
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cn9k_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar_mseg(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
+ cnxk_ep_rx_refill(droq);
+ } else {
+ /* SDP output goes into DROP state when output doorbell count
+ * goes below drop count. When door bell count is written with
+ * a value greater than drop count SDP output should come out
+ * of DROP state. Due to a race condition this is not happening.
+ * Writing doorbell register with 0 again may make SDP output
+ * come out of this state.
+ */
+
+ rte_write32(0, droq->pkts_credit_reg);
+ }
+
+ return new_pkts;
+}
diff --git a/drivers/net/octeon_ep/cnxk_ep_tx.c b/drivers/net/octeon_ep/cnxk_ep_tx.c
new file mode 100644
index 0000000000..9f11a2f317
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_tx.c
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "cnxk_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static uint32_t
+cnxk_vf_update_read_index(struct otx_ep_instr_queue *iq)
+{
+ uint32_t val;
+
+ /* Batch subtractions from the HW counter to reduce PCIe traffic
+ * This adds an extra local variable, but almost halves the
+ * number of PCIe writes.
+ */
+ val = __atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED);
+ iq->inst_cnt += val - iq->inst_cnt_ism_prev;
+ iq->inst_cnt_ism_prev = val;
+
+ if (val > (uint32_t)(1 << 31)) {
+ /* Only subtract the packet count in the HW counter
+ * when count above halfway to saturation.
+ */
+ rte_write64((uint64_t)val, iq->inst_cnt_reg);
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ while (__atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ rte_mb();
+ }
+
+ iq->inst_cnt_ism_prev = 0;
+ }
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+
+ /* Modulo of the new index with the IQ size will give us
+ * the new index.
+ */
+ return iq->inst_cnt & (iq->nb_desc - 1);
+}
+
+static inline void
+cnxk_ep_flush_iq(struct otx_ep_instr_queue *iq)
+{
+ uint32_t instr_processed = 0;
+ uint32_t cnt = 0;
+
+ iq->otx_read_index = cnxk_vf_update_read_index(iq);
+
+ if (unlikely(iq->flush_index == iq->otx_read_index))
+ return;
+
+ if (iq->flush_index < iq->otx_read_index) {
+ instr_processed = iq->otx_read_index - iq->flush_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc);
+ } else {
+ cnt = iq->nb_desc - iq->flush_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], cnt);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, cnt, iq->nb_desc);
+
+ instr_processed = iq->otx_read_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc);
+
+ instr_processed += cnt;
+ }
+
+ iq->stats.instr_processed = instr_processed;
+ iq->instr_pending -= instr_processed;
+}
+
+static inline void
+set_sg_size(struct otx_ep_sg_entry *sg_entry, uint16_t size, uint32_t pos)
+{
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ sg_entry->u.size[pos] = size;
+#elif RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+ sg_entry->u.size[(OTX_EP_NUM_SG_PTRS - 1) - pos] = size;
+#endif
+}
+
+static __rte_always_inline void
+cnxk_ep_xmit_pkts_scalar(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq, uint16_t nb_pkts)
+{
+ struct cnxk_ep_instr_32B *iqcmd;
+ struct rte_mbuf *m;
+ uint32_t pkt_len;
+ uint32_t tx_bytes = 0;
+ uint32_t write_idx = iq->host_write_index;
+ uint16_t pkts, nb_desc = iq->nb_desc;
+ uint8_t desc_size = iq->desc_size;
+
+ for (pkts = 0; pkts < nb_pkts; pkts++) {
+ m = tx_pkts[pkts];
+ iq->mbuf_list[write_idx] = m;
+ pkt_len = rte_pktmbuf_data_len(m);
+
+ iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size));
+ iqcmd->ih.u64 = iq->partial_ih | pkt_len;
+ iqcmd->dptr = rte_mbuf_data_iova(m); /*dptr*/
+ tx_bytes += pkt_len;
+
+ /* Increment the host write index */
+ write_idx = otx_ep_incr_index(write_idx, 1, nb_desc);
+ }
+ iq->host_write_index = write_idx;
+
+ /* ring dbell */
+ rte_io_wmb();
+ rte_write64(pkts, iq->doorbell_reg);
+ iq->instr_pending += pkts;
+ iq->stats.tx_pkts += pkts;
+ iq->stats.tx_bytes += tx_bytes;
+}
+
+static __rte_always_inline uint16_t
+cnxk_ep_xmit_pkts_scalar_mseg(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq,
+ uint16_t nb_pkts)
+{
+ uint16_t frags, num_sg, mask = OTX_EP_NUM_SG_PTRS - 1;
+ struct otx_ep_buf_free_info *finfo;
+ struct cnxk_ep_instr_32B *iqcmd;
+ struct rte_mbuf *m;
+ uint32_t pkt_len, tx_bytes = 0;
+ uint32_t write_idx = iq->host_write_index;
+ uint16_t pkts, nb_desc = iq->nb_desc;
+ uint8_t desc_size = iq->desc_size;
+
+ for (pkts = 0; pkts < nb_pkts; pkts++) {
+ uint16_t j = 0;
+
+ m = tx_pkts[pkts];
+ frags = m->nb_segs;
+
+ pkt_len = rte_pktmbuf_pkt_len(m);
+ num_sg = (frags + mask) / OTX_EP_NUM_SG_PTRS;
+
+ if (unlikely(pkt_len > OTX_EP_MAX_PKT_SZ && num_sg > OTX_EP_MAX_SG_LISTS)) {
+ otx_ep_err("Failed to xmit the pkt, pkt_len is higher or pkt has more segments\n");
+ goto exit;
+ }
+
+ finfo = &iq->req_list[write_idx].finfo;
+
+ iq->mbuf_list[write_idx] = m;
+ iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size));
+ iqcmd->dptr = rte_mem_virt2iova(finfo->g.sg);
+ iqcmd->ih.u64 = iq->partial_ih | (1ULL << 62) | ((uint64_t)frags << 48) | pkt_len;
+
+ while (frags--) {
+ finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
+ set_sg_size(&finfo->g.sg[(j >> 2)], m->data_len, (j & mask));
+ j++;
+ m = m->next;
+ }
+
+ /* Increment the host write index */
+ write_idx = otx_ep_incr_index(write_idx, 1, nb_desc);
+ tx_bytes += pkt_len;
+ }
+exit:
+ iq->host_write_index = write_idx;
+
+ /* ring dbell */
+ rte_io_wmb();
+ rte_write64(pkts, iq->doorbell_reg);
+ iq->instr_pending += pkts;
+ iq->stats.tx_pkts += pkts;
+ iq->stats.tx_bytes += tx_bytes;
+
+ return pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
+ uint16_t pkts;
+
+ pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending);
+
+ cnxk_ep_xmit_pkts_scalar(tx_pkts, iq, pkts);
+
+ if (iq->instr_pending >= OTX_EP_MAX_INSTR)
+ cnxk_ep_flush_iq(iq);
+
+ /* Return no# of instructions posted successfully. */
+ return pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
+ uint16_t pkts;
+
+ pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending);
+
+ pkts = cnxk_ep_xmit_pkts_scalar_mseg(tx_pkts, iq, pkts);
+
+ if (iq->instr_pending >= OTX_EP_MAX_INSTR)
+ cnxk_ep_flush_iq(iq);
+
+ /* Return no# of instructions posted successfully. */
+ return pkts;
+}
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 7b3669fe0c..ef275703c3 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -156,6 +156,8 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
(void *)iq->inst_cnt_ism, ism_addr);
*iq->inst_cnt_ism = 0;
iq->inst_cnt_ism_prev = 0;
+ iq->partial_ih = ((uint64_t)otx_ep->pkind) << 36;
+
return 0;
}
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.h b/drivers/net/octeon_ep/cnxk_ep_vf.h
index 86277449ea..41d8fbbb3a 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.h
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.h
@@ -6,6 +6,8 @@
#include <rte_io.h>
+#include "otx_ep_common.h"
+
#define CNXK_CONFIG_XPANSION_BAR 0x38
#define CNXK_CONFIG_PCIE_CAP 0x70
#define CNXK_CONFIG_PCIE_DEVCAP 0x74
@@ -178,6 +180,17 @@ struct cnxk_ep_instr_64B {
uint64_t exhdr[4];
};
+struct cnxk_ep_instr_32B {
+ /* Pointer where the input data is available. */
+ uint64_t dptr;
+
+ /* OTX_EP Instruction Header. */
+ union otx_ep_instr_ih ih;
+
+ /* Misc data bytes that can be passed as front data */
+ uint64_t rsvd[2];
+};
+
#define CNXK_EP_IQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue) + 4)
#define CNXK_EP_OQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue))
#define CNXK_EP_ISM_EN (0x1)
diff --git a/drivers/net/octeon_ep/meson.build b/drivers/net/octeon_ep/meson.build
index e698bf9792..749776d70c 100644
--- a/drivers/net/octeon_ep/meson.build
+++ b/drivers/net/octeon_ep/meson.build
@@ -9,4 +9,6 @@ sources = files(
'otx2_ep_vf.c',
'cnxk_ep_vf.c',
'otx_ep_mbox.c',
+ 'cnxk_ep_rx.c',
+ 'cnxk_ep_tx.c',
)
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c b/drivers/net/octeon_ep/otx2_ep_vf.c
index f72b8d25d7..7f4edf8dcf 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -307,6 +307,7 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
(unsigned int)ism_addr);
*iq->inst_cnt_ism = 0;
iq->inst_cnt_ism_prev = 0;
+ iq->partial_ih = ((uint64_t)otx_ep->pkind) << 36;
return 0;
}
diff --git a/drivers/net/octeon_ep/otx_ep_common.h b/drivers/net/octeon_ep/otx_ep_common.h
index 90e059cad0..82e57520d3 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -4,7 +4,20 @@
#ifndef _OTX_EP_COMMON_H_
#define _OTX_EP_COMMON_H_
+#include <rte_bitops.h>
#include <rte_spinlock.h>
+#include <unistd.h>
+#include <assert.h>
+#include <rte_eal.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_io.h>
+#include <rte_net.h>
+#include <ethdev_pci.h>
+
+#define OTX_EP_CN8XX RTE_BIT32(0)
+#define OTX_EP_CN9XX RTE_BIT32(1)
+#define OTX_EP_CN10XX RTE_BIT32(2)
#define OTX_EP_NW_PKT_OP 0x1220
#define OTX_EP_NW_CMD_OP 0x1221
@@ -38,7 +51,7 @@
#define OTX_EP_NORESP_OHSM_SEND (4)
#define OTX_EP_NORESP_LAST (4)
#define OTX_EP_PCI_RING_ALIGN 65536
-#define OTX_EP_MAX_SG_LISTS 4
+#define OTX_EP_MAX_SG_LISTS 6
#define OTX_EP_NUM_SG_PTRS 4
#define SDP_PKIND 40
#define SDP_OTX2_PKIND 57
@@ -203,6 +216,38 @@ struct otx_ep_iq_config {
* such structure to represent it.
*/
struct otx_ep_instr_queue {
+ /* Location in memory updated by SDP ISM */
+ uint32_t *inst_cnt_ism;
+ struct rte_mbuf **mbuf_list;
+ /* Pointer to the Virtual Base addr of the input ring. */
+ uint8_t *base_addr;
+
+ /* track inst count locally to consolidate HW counter updates */
+ uint32_t inst_cnt_ism_prev;
+
+ /* Input ring index, where the driver should write the next packet */
+ uint32_t host_write_index;
+
+ /* Input ring index, where the OCTEON 9 should read the next packet */
+ uint32_t otx_read_index;
+ /** This index aids in finding the window in the queue where OCTEON 9
+ * has read the commands.
+ */
+ uint32_t flush_index;
+ /* This keeps track of the instructions pending in this queue. */
+ uint64_t instr_pending;
+
+ /* Memory zone */
+ const struct rte_memzone *iq_mz;
+ /* OTX_EP doorbell register for the ring. */
+ void *doorbell_reg;
+
+ /* OTX_EP instruction count register for this ring. */
+ void *inst_cnt_reg;
+
+ /* Number of instructions pending to be posted to OCTEON 9. */
+ uint32_t fill_cnt;
+
struct otx_ep_device *otx_ep_dev;
uint32_t q_no;
@@ -219,54 +264,21 @@ struct otx_ep_instr_queue {
/* Size of the descriptor. */
uint8_t desc_size;
- /* Input ring index, where the driver should write the next packet */
- uint32_t host_write_index;
-
- /* Input ring index, where the OCTEON 9 should read the next packet */
- uint32_t otx_read_index;
-
uint32_t reset_instr_cnt;
- /** This index aids in finding the window in the queue where OCTEON 9
- * has read the commands.
- */
- uint32_t flush_index;
-
/* Free-running/wrapping instruction counter for IQ. */
uint32_t inst_cnt;
- /* This keeps track of the instructions pending in this queue. */
- uint64_t instr_pending;
-
- /* Pointer to the Virtual Base addr of the input ring. */
- uint8_t *base_addr;
+ uint64_t partial_ih;
/* This IQ request list */
struct otx_ep_instr_list *req_list;
- /* OTX_EP doorbell register for the ring. */
- void *doorbell_reg;
-
- /* OTX_EP instruction count register for this ring. */
- void *inst_cnt_reg;
-
- /* Number of instructions pending to be posted to OCTEON 9. */
- uint32_t fill_cnt;
-
/* Statistics for this input queue. */
struct otx_ep_iq_stats stats;
/* DMA mapped base address of the input descriptor ring. */
uint64_t base_addr_dma;
-
- /* Memory zone */
- const struct rte_memzone *iq_mz;
-
- /* Location in memory updated by SDP ISM */
- uint32_t *inst_cnt_ism;
-
- /* track inst count locally to consolidate HW counter updates */
- uint32_t inst_cnt_ism_prev;
};
/** Descriptor format.
@@ -344,14 +356,17 @@ struct otx_ep_oq_config {
/* The Descriptor Ring Output Queue(DROQ) structure. */
struct otx_ep_droq {
- struct otx_ep_device *otx_ep_dev;
/* The 8B aligned descriptor ring starts at this address. */
struct otx_ep_droq_desc *desc_ring;
- uint32_t q_no;
- uint64_t last_pkt_count;
+ /* The 8B aligned info ptrs begin from this address. */
+ struct otx_ep_droq_info *info_list;
- struct rte_mempool *mpool;
+ /* receive buffer list contains mbuf ptr list */
+ struct rte_mbuf **recv_buf_list;
+
+ /* Packets pending to be processed */
+ uint64_t pkts_pending;
/* Driver should read the next packet at this index */
uint32_t read_idx;
@@ -362,22 +377,17 @@ struct otx_ep_droq {
/* At this index, the driver will refill the descriptor's buffer */
uint32_t refill_idx;
- /* Packets pending to be processed */
- uint64_t pkts_pending;
+ /* The number of descriptors pending to refill. */
+ uint32_t refill_count;
/* Number of descriptors in this ring. */
uint32_t nb_desc;
- /* The number of descriptors pending to refill. */
- uint32_t refill_count;
-
uint32_t refill_threshold;
- /* The 8B aligned info ptrs begin from this address. */
- struct otx_ep_droq_info *info_list;
+ uint64_t last_pkt_count;
- /* receive buffer list contains mbuf ptr list */
- struct rte_mbuf **recv_buf_list;
+ struct rte_mempool *mpool;
/* The size of each buffer pointed by the buffer pointer. */
uint32_t buffer_size;
@@ -392,6 +402,13 @@ struct otx_ep_droq {
*/
void *pkts_sent_reg;
+ /* Pointer to host memory copy of output packet count, set by ISM */
+ uint32_t *pkts_sent_ism;
+ uint32_t pkts_sent_ism_prev;
+
+ /* Statistics for this DROQ. */
+ struct otx_ep_droq_stats stats;
+
/** Handle DMA incompletion during pkt reads.
* This variable is used to initiate a sent_reg_read
* that completes pending dma
@@ -400,8 +417,9 @@ struct otx_ep_droq {
*/
uint32_t sent_reg_val;
- /* Statistics for this DROQ. */
- struct otx_ep_droq_stats stats;
+ uint32_t q_no;
+
+ struct otx_ep_device *otx_ep_dev;
/* DMA mapped address of the DROQ descriptor ring. */
size_t desc_ring_dma;
@@ -419,10 +437,6 @@ struct otx_ep_droq {
const struct rte_memzone *desc_ring_mz;
const struct rte_memzone *info_mz;
-
- /* Pointer to host memory copy of output packet count, set by ISM */
- uint32_t *pkts_sent_ism;
- uint32_t pkts_sent_ism_prev;
};
#define OTX_EP_DROQ_SIZE (sizeof(struct otx_ep_droq))
@@ -545,6 +559,9 @@ struct otx_ep_device {
/* Negotiated Mbox version */
uint32_t mbox_neg_ver;
+
+ /* Generation */
+ uint32_t chip_gen;
};
int otx_ep_setup_iqs(struct otx_ep_device *otx_ep, uint32_t iq_no,
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c b/drivers/net/octeon_ep/otx_ep_ethdev.c
index 57b965ad06..e965cbaa16 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -27,6 +27,46 @@ static const struct rte_eth_desc_lim otx_ep_tx_desc_lim = {
.nb_align = OTX_EP_TXD_ALIGN,
};
+static void
+otx_ep_set_tx_func(struct rte_eth_dev *eth_dev)
+{
+ struct otx_ep_device *otx_epvf = OTX_EP_DEV(eth_dev);
+
+ if (otx_epvf->chip_gen == OTX_EP_CN10XX || otx_epvf->chip_gen == OTX_EP_CN9XX) {
+ eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts;
+ if (otx_epvf->tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS)
+ eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts_mseg;
+ } else {
+ eth_dev->tx_pkt_burst = &otx_ep_xmit_pkts;
+ }
+
+ if (eth_dev->data->dev_started)
+ rte_eth_fp_ops[eth_dev->data->port_id].tx_pkt_burst =
+ eth_dev->tx_pkt_burst;
+}
+
+static void
+otx_ep_set_rx_func(struct rte_eth_dev *eth_dev)
+{
+ struct otx_ep_device *otx_epvf = OTX_EP_DEV(eth_dev);
+
+ if (otx_epvf->chip_gen == OTX_EP_CN10XX) {
+ eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts;
+ if (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts_mseg;
+ } else if (otx_epvf->chip_gen == OTX_EP_CN9XX) {
+ eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts;
+ if (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts_mseg;
+ } else {
+ eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
+ }
+
+ if (eth_dev->data->dev_started)
+ rte_eth_fp_ops[eth_dev->data->port_id].rx_pkt_burst =
+ eth_dev->rx_pkt_burst;
+}
+
static int
otx_ep_dev_info_get(struct rte_eth_dev *eth_dev,
struct rte_eth_dev_info *devinfo)
@@ -154,6 +194,10 @@ otx_ep_dev_start(struct rte_eth_dev *eth_dev)
}
otx_ep_dev_link_update(eth_dev, 0);
+
+ otx_ep_set_tx_func(eth_dev);
+ otx_ep_set_rx_func(eth_dev);
+
otx_ep_info("dev started\n");
return 0;
@@ -255,18 +299,23 @@ otx_epdev_init(struct otx_ep_device *otx_epvf)
otx_epvf->fn_list.setup_device_regs(otx_epvf);
+ otx_epvf->eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts;
otx_epvf->eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
- if (otx_epvf->chip_id == PCI_DEVID_OCTEONTX_EP_VF)
+ if (otx_epvf->chip_id == PCI_DEVID_OCTEONTX_EP_VF) {
otx_epvf->eth_dev->tx_pkt_burst = &otx_ep_xmit_pkts;
- else if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
+ otx_epvf->chip_gen = OTX_EP_CN8XX;
+ } else if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CN98XX_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CNF95N_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF) {
- otx_epvf->eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
+ otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF) {
+ otx_epvf->eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts;
+ otx_epvf->chip_gen = OTX_EP_CN9XX;
+ } else if (otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF) {
+ otx_epvf->eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts;
+ otx_epvf->chip_gen = OTX_EP_CN10XX;
} else {
otx_ep_err("Invalid chip_id\n");
ret = -EINVAL;
@@ -656,8 +705,8 @@ otx_ep_eth_dev_init(struct rte_eth_dev *eth_dev)
/* Single process support */
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
eth_dev->dev_ops = &otx_ep_eth_dev_ops;
- eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
- eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
+ otx_ep_set_tx_func(eth_dev);
+ otx_ep_set_rx_func(eth_dev);
return 0;
}
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c b/drivers/net/octeon_ep/otx_ep_rxtx.c
index 4c509a419f..c421ef0a1c 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -13,15 +13,8 @@
#include "otx_ep_common.h"
#include "otx_ep_vf.h"
-#include "otx2_ep_vf.h"
#include "otx_ep_rxtx.h"
-/* SDP_LENGTH_S specifies packet length and is of 8-byte size */
-#define OTX_EP_INFO_SIZE 8
-#define OTX_EP_FSZ_FS0 0
-#define DROQ_REFILL_THRESHOLD 16
-#define OTX2_SDP_REQUEST_ISM (0x1ULL << 63)
-
static void
otx_ep_dmazone_free(const struct rte_memzone *mz)
{
@@ -144,6 +137,13 @@ otx_ep_init_instr_queue(struct otx_ep_device *otx_ep, int iq_no, int num_descs,
iq_no, iq->base_addr, (unsigned long)iq->base_addr_dma,
iq->nb_desc);
+ iq->mbuf_list = rte_zmalloc_socket("mbuf_list", (iq->nb_desc * sizeof(struct rte_mbuf *)),
+ RTE_CACHE_LINE_SIZE, rte_socket_id());
+ if (!iq->mbuf_list) {
+ otx_ep_err("IQ[%d] mbuf_list alloc failed\n", iq_no);
+ goto iq_init_fail;
+ }
+
iq->otx_ep_dev = otx_ep;
iq->q_no = iq_no;
iq->fill_cnt = 0;
@@ -675,85 +675,6 @@ otx_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
return count;
}
-/* Enqueue requests/packets to OTX_EP IQ queue.
- * returns number of requests enqueued successfully
- */
-uint16_t
-otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
-{
- struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
- struct otx_ep_device *otx_ep = iq->otx_ep_dev;
- struct otx2_ep_instr_64B iqcmd2;
- uint32_t iqreq_type;
- struct rte_mbuf *m;
- uint32_t pkt_len;
- int count = 0;
- uint16_t i;
- int dbell;
- int index;
-
- iqcmd2.ih.u64 = 0;
- iqcmd2.irh.u64 = 0;
-
- /* ih invars */
- iqcmd2.ih.s.fsz = OTX_EP_FSZ_FS0;
- iqcmd2.ih.s.pkind = otx_ep->pkind; /* The SDK decided PKIND value */
- /* irh invars */
- iqcmd2.irh.s.opcode = OTX_EP_NW_PKT_OP;
-
- for (i = 0; i < nb_pkts; i++) {
- m = pkts[i];
- if (m->nb_segs == 1) {
- pkt_len = rte_pktmbuf_data_len(m);
- iqcmd2.ih.s.tlen = pkt_len + iqcmd2.ih.s.fsz;
- iqcmd2.dptr = rte_mbuf_data_iova(m); /*dptr*/
- iqcmd2.ih.s.gather = 0;
- iqcmd2.ih.s.gsz = 0;
- iqreq_type = OTX_EP_REQTYPE_NORESP_NET;
- } else {
- if (!(otx_ep->tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS))
- goto xmit_fail;
-
- if (unlikely(prepare_xmit_gather_list(iq, m, &iqcmd2.dptr, &iqcmd2.ih) < 0))
- goto xmit_fail;
-
- pkt_len = rte_pktmbuf_pkt_len(m);
- iqreq_type = OTX_EP_REQTYPE_NORESP_GATHER;
- }
-
- iqcmd2.irh.u64 = rte_bswap64(iqcmd2.irh.u64);
-
-#ifdef OTX_EP_IO_DEBUG
- otx_ep_dbg("After swapping\n");
- otx_ep_dbg("Word0 [dptr]: 0x%016lx\n",
- (unsigned long)iqcmd.dptr);
- otx_ep_dbg("Word1 [ihtx]: 0x%016lx\n", (unsigned long)iqcmd.ih);
- otx_ep_dbg("Word2 [pki_ih3]: 0x%016lx\n",
- (unsigned long)iqcmd.pki_ih3);
- otx_ep_dbg("Word3 [rptr]: 0x%016lx\n",
- (unsigned long)iqcmd.rptr);
- otx_ep_dbg("Word4 [irh]: 0x%016lx\n", (unsigned long)iqcmd.irh);
- otx_ep_dbg("Word5 [exhdr[0]]: 0x%016lx\n",
- (unsigned long)iqcmd.exhdr[0]);
-#endif
- index = iq->host_write_index;
- dbell = (i == (unsigned int)(nb_pkts - 1)) ? 1 : 0;
- if (otx_ep_send_data(otx_ep, iq, &iqcmd2, dbell))
- goto xmit_fail;
- otx_ep_iqreq_add(iq, m, iqreq_type, index);
- iq->stats.tx_pkts++;
- iq->stats.tx_bytes += pkt_len;
- count++;
- }
-
-xmit_fail:
- if (iq->instr_pending >= OTX_EP_MAX_INSTR)
- otx_ep_flush_iq(iq);
-
- /* Return no# of instructions posted successfully. */
- return count;
-}
-
static uint32_t
otx_ep_droq_refill(struct otx_ep_droq *droq)
{
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.h b/drivers/net/octeon_ep/otx_ep_rxtx.h
index 3f12527004..cb68ef3b41 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.h
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.h
@@ -7,29 +7,53 @@
#include <rte_byteorder.h>
-#define OTX_EP_RXD_ALIGN 2
-#define OTX_EP_TXD_ALIGN 2
+#define OTX_EP_RXD_ALIGN 8
+#define OTX_EP_TXD_ALIGN 8
#define OTX_EP_IQ_SEND_FAILED (-1)
#define OTX_EP_IQ_SEND_SUCCESS (0)
-#define OTX_EP_MAX_DELAYED_PKT_RETRIES 10000
+#define OTX_EP_MAX_DELAYED_PKT_RETRIES 10
#define OTX_EP_FSZ 28
#define OTX2_EP_FSZ 24
-#define OTX_EP_MAX_INSTR 16
+#define OTX_EP_MAX_INSTR 128
+
+/* SDP_LENGTH_S specifies packet length and is of 8-byte size */
+#define OTX_EP_INFO_SIZE 8
+#define DROQ_REFILL_THRESHOLD 16
+#define OTX2_SDP_REQUEST_ISM (0x1ULL << 63)
static inline uint32_t
otx_ep_incr_index(uint32_t index, uint32_t count, uint32_t max)
{
return ((index + count) & (max - 1));
}
+
uint16_t
otx_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
uint16_t
otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+otx_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cnxk_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+cnxk_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cnxk_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cn9k_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
uint16_t
-otx_ep_recv_pkts(void *rx_queue,
- struct rte_mbuf **rx_pkts,
- uint16_t budget);
+cn9k_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
#endif /* _OTX_EP_RXTX_H_ */
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v4 0/3] rewrite fastpath routines
2023-10-11 12:53 ` [PATCH v3 0/3] rewrite " Vamsi Attunuru
` (2 preceding siblings ...)
2023-10-11 12:53 ` [PATCH v3 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
@ 2023-10-12 6:23 ` Vamsi Attunuru
2023-10-12 6:23 ` [PATCH v4 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
` (3 more replies)
3 siblings, 4 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-12 6:23 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
This series adds new fastpath routines for cn10k & cn9k endpoint
devices and supports 32B Tx desciptor format which improves the
performance.
v4 changes:
- Use rte_atomic_xxx instead of __atomic_xxx built-ins
v2 & v3 changes:
- Fixed CI
Shijith Thotton (1):
net/octeon_ep: support 32B IQ descriptor size
Vamsi Attunuru (2):
net/octeon_ep: clean up receive routine
net/octeon_ep: add new fastpath routines
drivers/net/octeon_ep/cnxk_ep_rx.c | 310 ++++++++++++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_tx.c | 210 +++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_vf.c | 12 +-
drivers/net/octeon_ep/cnxk_ep_vf.h | 13 ++
drivers/net/octeon_ep/meson.build | 2 +
drivers/net/octeon_ep/otx2_ep_vf.c | 11 +-
drivers/net/octeon_ep/otx_ep_common.h | 127 ++++++-----
drivers/net/octeon_ep/otx_ep_ethdev.c | 69 +++++-
drivers/net/octeon_ep/otx_ep_rxtx.c | 257 +++++++--------------
drivers/net/octeon_ep/otx_ep_rxtx.h | 38 +++-
drivers/net/octeon_ep/otx_ep_vf.c | 8 +
11 files changed, 805 insertions(+), 252 deletions(-)
create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx.c
create mode 100644 drivers/net/octeon_ep/cnxk_ep_tx.c
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v4 1/3] net/octeon_ep: support 32B IQ descriptor size
2023-10-12 6:23 ` [PATCH v4 0/3] rewrite " Vamsi Attunuru
@ 2023-10-12 6:23 ` Vamsi Attunuru
2023-10-12 6:23 ` [PATCH v4 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
` (2 subsequent siblings)
3 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-12 6:23 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton
From: Shijith Thotton <sthotton@marvell.com>
Update input queue setup to consider descriptor size in driver conf.
The default instruction size for otx2 and cnxk devices has been updated
to 32 bytes.
Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
drivers/net/octeon_ep/cnxk_ep_vf.c | 10 +++++++++-
drivers/net/octeon_ep/otx2_ep_vf.c | 10 +++++++++-
drivers/net/octeon_ep/otx_ep_common.h | 4 ++++
drivers/net/octeon_ep/otx_ep_vf.c | 8 ++++++++
4 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 92c2d2ca5c..7b3669fe0c 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -106,6 +106,14 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(CNXK_EP_R_IN_CTL_IS_64B);
+ else
+ reg_val |= CNXK_EP_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + CNXK_EP_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + CNXK_EP_R_IN_INSTR_RSIZE(iq_no));
@@ -354,7 +362,7 @@ static const struct otx_ep_config default_cnxk_ep_conf = {
/* IQ attributes */
.iq = {
.max_iqs = OTX_EP_CFG_IO_QUEUES,
- .instr_type = OTX_EP_64BYTE_INSTR,
+ .instr_type = OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c b/drivers/net/octeon_ep/otx2_ep_vf.c
index ced3a415a5..f72b8d25d7 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -256,6 +256,14 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(SDP_VF_R_IN_CTL_IS_64B);
+ else
+ reg_val |= SDP_VF_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + SDP_VF_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_RSIZE(iq_no));
@@ -500,7 +508,7 @@ static const struct otx_ep_config default_otx2_ep_conf = {
/* IQ attributes */
.iq = {
.max_iqs = OTX_EP_CFG_IO_QUEUES,
- .instr_type = OTX_EP_64BYTE_INSTR,
+ .instr_type = OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx_ep_common.h b/drivers/net/octeon_ep/otx_ep_common.h
index c150cbe619..90e059cad0 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -11,6 +11,7 @@
#define OTX_EP_MAX_RINGS_PER_VF (8)
#define OTX_EP_CFG_IO_QUEUES OTX_EP_MAX_RINGS_PER_VF
+#define OTX_EP_32BYTE_INSTR (32)
#define OTX_EP_64BYTE_INSTR (64)
/*
* Backpressure for SDP is configured on Octeon, and the minimum queue sizes
@@ -215,6 +216,9 @@ struct otx_ep_instr_queue {
/* Number of descriptors in this ring. */
uint32_t nb_desc;
+ /* Size of the descriptor. */
+ uint8_t desc_size;
+
/* Input ring index, where the driver should write the next packet */
uint32_t host_write_index;
diff --git a/drivers/net/octeon_ep/otx_ep_vf.c b/drivers/net/octeon_ep/otx_ep_vf.c
index 4f3538146b..236b7a874c 100644
--- a/drivers/net/octeon_ep/otx_ep_vf.c
+++ b/drivers/net/octeon_ep/otx_ep_vf.c
@@ -120,6 +120,14 @@ otx_ep_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (iq->desc_size == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(OTX_EP_R_IN_CTL_IS_64B);
+ else
+ reg_val |= OTX_EP_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + OTX_EP_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
otx_ep_write64(iq->base_addr_dma, otx_ep->hw_addr,
OTX_EP_R_IN_INSTR_BADDR(iq_no));
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v4 2/3] net/octeon_ep: clean up receive routine
2023-10-12 6:23 ` [PATCH v4 0/3] rewrite " Vamsi Attunuru
2023-10-12 6:23 ` [PATCH v4 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
@ 2023-10-12 6:23 ` Vamsi Attunuru
2023-10-12 6:23 ` [PATCH v4 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
2023-10-18 8:07 ` [PATCH v5 0/3] rewrite " Vamsi Attunuru
3 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-12 6:23 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
Patch improves Rx routine and pkt count update routines,
packet count update routines need to drain inflight ISM
memory updates while decrementing the packet count register.
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
---
drivers/net/octeon_ep/otx_ep_rxtx.c | 164 ++++++++++++----------------
1 file changed, 70 insertions(+), 94 deletions(-)
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c b/drivers/net/octeon_ep/otx_ep_rxtx.c
index b37fc8109f..2654e13e18 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -442,7 +442,15 @@ otx_vf_update_read_index(struct otx_ep_instr_queue *iq)
* when count above halfway to saturation.
*/
rte_write32(val, iq->inst_cnt_reg);
- *iq->inst_cnt_ism = 0;
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ while (rte_atomic_load_explicit(iq->inst_cnt_ism, rte_memory_order_relaxed) >=
+ val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ rte_mb();
+ }
+
iq->inst_cnt_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
@@ -567,9 +575,7 @@ prepare_xmit_gather_list(struct otx_ep_instr_queue *iq, struct rte_mbuf *m, uint
finfo = &iq->req_list[iq->host_write_index].finfo;
*dptr = rte_mem_virt2iova(finfo->g.sg);
- ih->s.tlen = pkt_len + ih->s.fsz;
- ih->s.gsz = frags;
- ih->s.gather = 1;
+ ih->u64 |= ((1ULL << 62) | ((uint64_t)frags << 48) | (pkt_len + ih->s.fsz));
while (frags--) {
finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
@@ -752,36 +758,26 @@ otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
static uint32_t
otx_ep_droq_refill(struct otx_ep_droq *droq)
{
- struct otx_ep_droq_desc *desc_ring;
+ struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
struct otx_ep_droq_info *info;
struct rte_mbuf *buf = NULL;
uint32_t desc_refilled = 0;
- desc_ring = droq->desc_ring;
-
while (droq->refill_count && (desc_refilled < droq->nb_desc)) {
- /* If a valid buffer exists (happens if there is no dispatch),
- * reuse the buffer, else allocate.
- */
- if (droq->recv_buf_list[droq->refill_idx] != NULL)
- break;
-
buf = rte_pktmbuf_alloc(droq->mpool);
/* If a buffer could not be allocated, no point in
* continuing
*/
- if (buf == NULL) {
+ if (unlikely(!buf)) {
droq->stats.rx_alloc_failure++;
break;
}
info = rte_pktmbuf_mtod(buf, struct otx_ep_droq_info *);
- memset(info, 0, sizeof(*info));
+ info->length = 0;
droq->recv_buf_list[droq->refill_idx] = buf;
desc_ring[droq->refill_idx].buffer_ptr =
rte_mbuf_data_iova_default(buf);
-
-
droq->refill_idx = otx_ep_incr_index(droq->refill_idx, 1,
droq->nb_desc);
@@ -793,21 +789,18 @@ otx_ep_droq_refill(struct otx_ep_droq *droq)
}
static struct rte_mbuf *
-otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
- struct otx_ep_droq *droq, int next_fetch)
+otx_ep_droq_read_packet(struct otx_ep_device *otx_ep, struct otx_ep_droq *droq, int next_fetch)
{
volatile struct otx_ep_droq_info *info;
- struct rte_mbuf *droq_pkt2 = NULL;
- struct rte_mbuf *droq_pkt = NULL;
- struct rte_net_hdr_lens hdr_lens;
- struct otx_ep_droq_info *info2;
+ struct rte_mbuf *mbuf_next = NULL;
+ struct rte_mbuf *mbuf = NULL;
uint64_t total_pkt_len;
uint32_t pkt_len = 0;
int next_idx;
- droq_pkt = droq->recv_buf_list[droq->read_idx];
- droq_pkt2 = droq->recv_buf_list[droq->read_idx];
- info = rte_pktmbuf_mtod(droq_pkt, struct otx_ep_droq_info *);
+ mbuf = droq->recv_buf_list[droq->read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
/* make sure info is available */
rte_rmb();
if (unlikely(!info->length)) {
@@ -828,32 +821,25 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
assert(0);
}
}
+
if (next_fetch) {
next_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
- droq_pkt2 = droq->recv_buf_list[next_idx];
- info2 = rte_pktmbuf_mtod(droq_pkt2, struct otx_ep_droq_info *);
- rte_prefetch_non_temporal((const void *)info2);
+ mbuf_next = droq->recv_buf_list[next_idx];
+ rte_prefetch0(rte_pktmbuf_mtod(mbuf_next, void *));
}
- info->length = rte_bswap64(info->length);
+ info->length = rte_bswap16(info->length >> 48);
/* Deduce the actual data size */
total_pkt_len = info->length + OTX_EP_INFO_SIZE;
if (total_pkt_len <= droq->buffer_size) {
- droq_pkt = droq->recv_buf_list[droq->read_idx];
- if (likely(droq_pkt != NULL)) {
- droq_pkt->data_off += OTX_EP_INFO_SIZE;
- /* otx_ep_dbg("OQ: pkt_len[%ld], buffer_size %d\n",
- * (long)info->length, droq->buffer_size);
- */
- pkt_len = (uint32_t)info->length;
- droq_pkt->pkt_len = pkt_len;
- droq_pkt->data_len = pkt_len;
- droq_pkt->port = otx_ep->port_id;
- droq->recv_buf_list[droq->read_idx] = NULL;
- droq->read_idx = otx_ep_incr_index(droq->read_idx, 1,
- droq->nb_desc);
- droq->refill_count++;
- }
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ pkt_len = (uint32_t)info->length;
+ mbuf->pkt_len = pkt_len;
+ mbuf->data_len = pkt_len;
+ mbuf->port = otx_ep->port_id;
+ droq->recv_buf_list[droq->read_idx] = NULL;
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
+ droq->refill_count++;
} else {
struct rte_mbuf *first_buf = NULL;
struct rte_mbuf *last_buf = NULL;
@@ -865,61 +851,50 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
while (pkt_len < total_pkt_len) {
int cpy_len = 0;
- cpy_len = ((pkt_len + droq->buffer_size) >
- total_pkt_len)
- ? ((uint32_t)total_pkt_len -
- pkt_len)
+ cpy_len = ((pkt_len + droq->buffer_size) > total_pkt_len)
+ ? ((uint32_t)total_pkt_len - pkt_len)
: droq->buffer_size;
- droq_pkt = droq->recv_buf_list[droq->read_idx];
+ mbuf = droq->recv_buf_list[droq->read_idx];
droq->recv_buf_list[droq->read_idx] = NULL;
- if (likely(droq_pkt != NULL)) {
+ if (likely(mbuf)) {
/* Note the first seg */
if (!pkt_len)
- first_buf = droq_pkt;
+ first_buf = mbuf;
- droq_pkt->port = otx_ep->port_id;
+ mbuf->port = otx_ep->port_id;
if (!pkt_len) {
- droq_pkt->data_off +=
- OTX_EP_INFO_SIZE;
- droq_pkt->pkt_len =
- cpy_len - OTX_EP_INFO_SIZE;
- droq_pkt->data_len =
- cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_len = cpy_len - OTX_EP_INFO_SIZE;
} else {
- droq_pkt->pkt_len = cpy_len;
- droq_pkt->data_len = cpy_len;
+ mbuf->pkt_len = cpy_len;
+ mbuf->data_len = cpy_len;
}
if (pkt_len) {
first_buf->nb_segs++;
- first_buf->pkt_len += droq_pkt->pkt_len;
+ first_buf->pkt_len += mbuf->pkt_len;
}
if (last_buf)
- last_buf->next = droq_pkt;
+ last_buf->next = mbuf;
- last_buf = droq_pkt;
+ last_buf = mbuf;
} else {
otx_ep_err("no buf\n");
assert(0);
}
pkt_len += cpy_len;
- droq->read_idx = otx_ep_incr_index(droq->read_idx, 1,
- droq->nb_desc);
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
droq->refill_count++;
}
- droq_pkt = first_buf;
+ mbuf = first_buf;
}
- droq_pkt->packet_type = rte_net_get_ptype(droq_pkt, &hdr_lens,
- RTE_PTYPE_ALL_MASK);
- droq_pkt->l2_len = hdr_lens.l2_len;
- droq_pkt->l3_len = hdr_lens.l3_len;
- droq_pkt->l4_len = hdr_lens.l4_len;
- return droq_pkt;
+ return mbuf;
}
static inline uint32_t
@@ -943,7 +918,15 @@ otx_ep_check_droq_pkts(struct otx_ep_droq *droq)
* when count above halfway to saturation.
*/
rte_write32(val, droq->pkts_sent_reg);
- *droq->pkts_sent_ism = 0;
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ while (rte_atomic_load_explicit(droq->pkts_sent_ism, rte_memory_order_relaxed) >=
+ val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ rte_mb();
+ }
+
droq->pkts_sent_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
@@ -952,36 +935,30 @@ otx_ep_check_droq_pkts(struct otx_ep_droq *droq)
return new_pkts;
}
+static inline int32_t __rte_hot
+otx_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
+{
+ if (unlikely(droq->pkts_pending < nb_pkts))
+ otx_ep_check_droq_pkts(droq);
+
+ return RTE_MIN(nb_pkts, droq->pkts_pending);
+}
+
/* Check for response arrival from OCTEON 9
* returns number of requests completed
*/
uint16_t
-otx_ep_recv_pkts(void *rx_queue,
- struct rte_mbuf **rx_pkts,
- uint16_t budget)
+otx_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
{
struct otx_ep_droq *droq = rx_queue;
struct otx_ep_device *otx_ep;
struct rte_mbuf *oq_pkt;
-
- uint32_t pkts = 0;
+ uint16_t pkts, new_pkts;
uint32_t valid_pkts = 0;
- uint32_t new_pkts = 0;
int next_fetch;
otx_ep = droq->otx_ep_dev;
-
- if (droq->pkts_pending > budget) {
- new_pkts = budget;
- } else {
- new_pkts = droq->pkts_pending;
- new_pkts += otx_ep_check_droq_pkts(droq);
- if (new_pkts > budget)
- new_pkts = budget;
- }
-
- if (!new_pkts)
- goto update_credit; /* No pkts at this moment */
+ new_pkts = otx_ep_rx_pkts_to_process(droq, nb_pkts);
for (pkts = 0; pkts < new_pkts; pkts++) {
/* Push the received pkt to application */
@@ -1006,7 +983,6 @@ otx_ep_recv_pkts(void *rx_queue,
droq->pkts_pending -= pkts;
/* Refill DROQ buffers */
-update_credit:
if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
int desc_refilled = otx_ep_droq_refill(droq);
@@ -1014,7 +990,7 @@ otx_ep_recv_pkts(void *rx_queue,
* that when we update the credits the data in memory is
* accurate.
*/
- rte_wmb();
+ rte_io_wmb();
rte_write32(desc_refilled, droq->pkts_credit_reg);
} else {
/*
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v4 3/3] net/octeon_ep: add new fastpath routines
2023-10-12 6:23 ` [PATCH v4 0/3] rewrite " Vamsi Attunuru
2023-10-12 6:23 ` [PATCH v4 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
2023-10-12 6:23 ` [PATCH v4 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
@ 2023-10-12 6:23 ` Vamsi Attunuru
2023-10-18 3:48 ` Jerin Jacob
2023-10-18 8:07 ` [PATCH v5 0/3] rewrite " Vamsi Attunuru
3 siblings, 1 reply; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-12 6:23 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
Adds new fastpath routines for cn10k & cn9k endpoint
devices and assigns the fastpath routines based on
the offload flags.
Patch also adds misc changes to improve performance
and code-readability.
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
---
drivers/net/octeon_ep/cnxk_ep_rx.c | 310 ++++++++++++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_tx.c | 210 +++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_vf.c | 2 +
drivers/net/octeon_ep/cnxk_ep_vf.h | 13 ++
drivers/net/octeon_ep/meson.build | 2 +
drivers/net/octeon_ep/otx2_ep_vf.c | 1 +
drivers/net/octeon_ep/otx_ep_common.h | 125 ++++++-----
drivers/net/octeon_ep/otx_ep_ethdev.c | 69 +++++-
drivers/net/octeon_ep/otx_ep_rxtx.c | 93 +-------
drivers/net/octeon_ep/otx_ep_rxtx.h | 38 +++-
10 files changed, 706 insertions(+), 157 deletions(-)
diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.c b/drivers/net/octeon_ep/cnxk_ep_rx.c
new file mode 100644
index 0000000000..22bf3ce7a7
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.c
@@ -0,0 +1,310 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "otx_ep_common.h"
+#include "otx2_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static inline int
+cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)
+{
+ struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t refill_idx = droq->refill_idx;
+ struct rte_mbuf *buf;
+ uint32_t i;
+ int rc;
+
+ rc = rte_pktmbuf_alloc_bulk(droq->mpool, &recv_buf_list[refill_idx], count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return rc;
+ }
+
+ for (i = 0; i < count; i++) {
+ buf = recv_buf_list[refill_idx];
+ desc_ring[refill_idx].buffer_ptr = rte_mbuf_data_iova_default(buf);
+ refill_idx++;
+ }
+
+ droq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, droq->nb_desc);
+ droq->refill_count -= count;
+
+ return 0;
+}
+
+static inline void
+cnxk_ep_rx_refill(struct otx_ep_droq *droq)
+{
+ uint32_t desc_refilled = 0, count;
+ uint32_t nb_desc = droq->nb_desc;
+ uint32_t refill_idx = droq->refill_idx;
+ int rc;
+
+ if (unlikely(droq->read_idx == refill_idx))
+ return;
+
+ if (refill_idx < droq->read_idx) {
+ count = droq->read_idx - refill_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+ desc_refilled = count;
+ } else {
+ count = nb_desc - refill_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+
+ desc_refilled = count;
+ count = droq->read_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+ desc_refilled += count;
+ }
+
+ /* Flush the droq descriptor data to memory to be sure
+ * that when we update the credits the data in memory is
+ * accurate.
+ */
+ rte_io_wmb();
+ rte_write32(desc_refilled, droq->pkts_credit_reg);
+}
+
+static inline uint32_t
+cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
+{
+ uint32_t new_pkts;
+ uint32_t val;
+
+ /* Batch subtractions from the HW counter to reduce PCIe traffic
+ * This adds an extra local variable, but almost halves the
+ * number of PCIe writes.
+ */
+ val = rte_atomic_load_explicit(droq->pkts_sent_ism, rte_memory_order_relaxed);
+ new_pkts = val - droq->pkts_sent_ism_prev;
+ droq->pkts_sent_ism_prev = val;
+
+ if (val > (uint32_t)(1 << 31)) {
+ /* Only subtract the packet count in the HW counter
+ * when count above halfway to saturation.
+ */
+ rte_write64((uint64_t)val, droq->pkts_sent_reg);
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ while (rte_atomic_load_explicit(droq->pkts_sent_ism, rte_memory_order_relaxed) >=
+ val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ rte_mb();
+ }
+
+ droq->pkts_sent_ism_prev = 0;
+ }
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ droq->pkts_pending += new_pkts;
+
+ return new_pkts;
+}
+
+static inline int16_t __rte_hot
+cnxk_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
+{
+ if (droq->pkts_pending < nb_pkts)
+ cnxk_ep_check_rx_pkts(droq);
+
+ return RTE_MIN(nb_pkts, droq->pkts_pending);
+}
+
+static __rte_always_inline void
+cnxk_ep_process_pkts_scalar(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, uint16_t new_pkts)
+{
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t bytes_rsvd = 0, read_idx = droq->read_idx;
+ uint16_t port_id = droq->otx_ep_dev->port_id;
+ uint16_t nb_desc = droq->nb_desc;
+ uint16_t pkts;
+
+ for (pkts = 0; pkts < new_pkts; pkts++) {
+ struct otx_ep_droq_info *info;
+ struct rte_mbuf *mbuf;
+ uint16_t pkt_len;
+
+ mbuf = recv_buf_list[read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+ read_idx = otx_ep_incr_index(read_idx, 1, nb_desc);
+ pkt_len = rte_bswap16(info->length >> 48);
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = pkt_len;
+ mbuf->data_len = pkt_len;
+ mbuf->port = port_id;
+ rx_pkts[pkts] = mbuf;
+ bytes_rsvd += pkt_len;
+ }
+ droq->read_idx = read_idx;
+
+ droq->refill_count += new_pkts;
+ droq->pkts_pending -= new_pkts;
+ /* Stats */
+ droq->stats.pkts_received += new_pkts;
+ droq->stats.bytes_received += bytes_rsvd;
+}
+
+static __rte_always_inline void
+cnxk_ep_process_pkts_scalar_mseg(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq,
+ uint16_t new_pkts)
+{
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t total_pkt_len, bytes_rsvd = 0;
+ uint16_t port_id = droq->otx_ep_dev->port_id;
+ uint16_t nb_desc = droq->nb_desc;
+ uint16_t pkts;
+
+ for (pkts = 0; pkts < new_pkts; pkts++) {
+ struct otx_ep_droq_info *info;
+ struct rte_mbuf *first_buf = NULL;
+ struct rte_mbuf *last_buf = NULL;
+ struct rte_mbuf *mbuf;
+ uint32_t pkt_len = 0;
+
+ mbuf = recv_buf_list[droq->read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
+ total_pkt_len = rte_bswap16(info->length >> 48) + OTX_EP_INFO_SIZE;
+
+ while (pkt_len < total_pkt_len) {
+ int cpy_len;
+
+ cpy_len = ((pkt_len + droq->buffer_size) > total_pkt_len)
+ ? ((uint32_t)total_pkt_len - pkt_len) : droq->buffer_size;
+
+ mbuf = droq->recv_buf_list[droq->read_idx];
+
+ if (!pkt_len) {
+ /* Note the first seg */
+ first_buf = mbuf;
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_len = cpy_len - OTX_EP_INFO_SIZE;
+ } else {
+ mbuf->pkt_len = cpy_len;
+ mbuf->data_len = cpy_len;
+ first_buf->nb_segs++;
+ first_buf->pkt_len += mbuf->pkt_len;
+ }
+
+ if (last_buf)
+ last_buf->next = mbuf;
+
+ last_buf = mbuf;
+
+ pkt_len += cpy_len;
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, nb_desc);
+ droq->refill_count++;
+ }
+ mbuf = first_buf;
+ mbuf->port = port_id;
+ rx_pkts[pkts] = mbuf;
+ bytes_rsvd += pkt_len;
+ }
+
+ droq->refill_count += new_pkts;
+ droq->pkts_pending -= pkts;
+ /* Stats */
+ droq->stats.pkts_received += pkts;
+ droq->stats.bytes_received += bytes_rsvd;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD)
+ cnxk_ep_rx_refill(droq);
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cn9k_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
+ cnxk_ep_rx_refill(droq);
+ } else {
+ /* SDP output goes into DROP state when output doorbell count
+ * goes below drop count. When door bell count is written with
+ * a value greater than drop count SDP output should come out
+ * of DROP state. Due to a race condition this is not happening.
+ * Writing doorbell register with 0 again may make SDP output
+ * come out of this state.
+ */
+
+ rte_write32(0, droq->pkts_credit_reg);
+ }
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar_mseg(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD)
+ cnxk_ep_rx_refill(droq);
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cn9k_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar_mseg(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
+ cnxk_ep_rx_refill(droq);
+ } else {
+ /* SDP output goes into DROP state when output doorbell count
+ * goes below drop count. When door bell count is written with
+ * a value greater than drop count SDP output should come out
+ * of DROP state. Due to a race condition this is not happening.
+ * Writing doorbell register with 0 again may make SDP output
+ * come out of this state.
+ */
+
+ rte_write32(0, droq->pkts_credit_reg);
+ }
+
+ return new_pkts;
+}
diff --git a/drivers/net/octeon_ep/cnxk_ep_tx.c b/drivers/net/octeon_ep/cnxk_ep_tx.c
new file mode 100644
index 0000000000..86f771ca7e
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_tx.c
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "cnxk_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static uint32_t
+cnxk_vf_update_read_index(struct otx_ep_instr_queue *iq)
+{
+ uint32_t val;
+
+ /* Batch subtractions from the HW counter to reduce PCIe traffic
+ * This adds an extra local variable, but almost halves the
+ * number of PCIe writes.
+ */
+ val = rte_atomic_load_explicit(iq->inst_cnt_ism, rte_memory_order_relaxed);
+ iq->inst_cnt += val - iq->inst_cnt_ism_prev;
+ iq->inst_cnt_ism_prev = val;
+
+ if (val > (uint32_t)(1 << 31)) {
+ /* Only subtract the packet count in the HW counter
+ * when count above halfway to saturation.
+ */
+ rte_write64((uint64_t)val, iq->inst_cnt_reg);
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ while (rte_atomic_load_explicit(iq->inst_cnt_ism, rte_memory_order_relaxed) >=
+ val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ rte_mb();
+ }
+
+ iq->inst_cnt_ism_prev = 0;
+ }
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+
+ /* Modulo of the new index with the IQ size will give us
+ * the new index.
+ */
+ return iq->inst_cnt & (iq->nb_desc - 1);
+}
+
+static inline void
+cnxk_ep_flush_iq(struct otx_ep_instr_queue *iq)
+{
+ uint32_t instr_processed = 0;
+ uint32_t cnt = 0;
+
+ iq->otx_read_index = cnxk_vf_update_read_index(iq);
+
+ if (unlikely(iq->flush_index == iq->otx_read_index))
+ return;
+
+ if (iq->flush_index < iq->otx_read_index) {
+ instr_processed = iq->otx_read_index - iq->flush_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc);
+ } else {
+ cnt = iq->nb_desc - iq->flush_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], cnt);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, cnt, iq->nb_desc);
+
+ instr_processed = iq->otx_read_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc);
+
+ instr_processed += cnt;
+ }
+
+ iq->stats.instr_processed = instr_processed;
+ iq->instr_pending -= instr_processed;
+}
+
+static inline void
+set_sg_size(struct otx_ep_sg_entry *sg_entry, uint16_t size, uint32_t pos)
+{
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ sg_entry->u.size[pos] = size;
+#elif RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+ sg_entry->u.size[(OTX_EP_NUM_SG_PTRS - 1) - pos] = size;
+#endif
+}
+
+static __rte_always_inline void
+cnxk_ep_xmit_pkts_scalar(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq, uint16_t nb_pkts)
+{
+ struct cnxk_ep_instr_32B *iqcmd;
+ struct rte_mbuf *m;
+ uint32_t pkt_len;
+ uint32_t tx_bytes = 0;
+ uint32_t write_idx = iq->host_write_index;
+ uint16_t pkts, nb_desc = iq->nb_desc;
+ uint8_t desc_size = iq->desc_size;
+
+ for (pkts = 0; pkts < nb_pkts; pkts++) {
+ m = tx_pkts[pkts];
+ iq->mbuf_list[write_idx] = m;
+ pkt_len = rte_pktmbuf_data_len(m);
+
+ iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size));
+ iqcmd->ih.u64 = iq->partial_ih | pkt_len;
+ iqcmd->dptr = rte_mbuf_data_iova(m); /*dptr*/
+ tx_bytes += pkt_len;
+
+ /* Increment the host write index */
+ write_idx = otx_ep_incr_index(write_idx, 1, nb_desc);
+ }
+ iq->host_write_index = write_idx;
+
+ /* ring dbell */
+ rte_io_wmb();
+ rte_write64(pkts, iq->doorbell_reg);
+ iq->instr_pending += pkts;
+ iq->stats.tx_pkts += pkts;
+ iq->stats.tx_bytes += tx_bytes;
+}
+
+static __rte_always_inline uint16_t
+cnxk_ep_xmit_pkts_scalar_mseg(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq,
+ uint16_t nb_pkts)
+{
+ uint16_t frags, num_sg, mask = OTX_EP_NUM_SG_PTRS - 1;
+ struct otx_ep_buf_free_info *finfo;
+ struct cnxk_ep_instr_32B *iqcmd;
+ struct rte_mbuf *m;
+ uint32_t pkt_len, tx_bytes = 0;
+ uint32_t write_idx = iq->host_write_index;
+ uint16_t pkts, nb_desc = iq->nb_desc;
+ uint8_t desc_size = iq->desc_size;
+
+ for (pkts = 0; pkts < nb_pkts; pkts++) {
+ uint16_t j = 0;
+
+ m = tx_pkts[pkts];
+ frags = m->nb_segs;
+
+ pkt_len = rte_pktmbuf_pkt_len(m);
+ num_sg = (frags + mask) / OTX_EP_NUM_SG_PTRS;
+
+ if (unlikely(pkt_len > OTX_EP_MAX_PKT_SZ && num_sg > OTX_EP_MAX_SG_LISTS)) {
+ otx_ep_err("Failed to xmit the pkt, pkt_len is higher or pkt has more segments\n");
+ goto exit;
+ }
+
+ finfo = &iq->req_list[write_idx].finfo;
+
+ iq->mbuf_list[write_idx] = m;
+ iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size));
+ iqcmd->dptr = rte_mem_virt2iova(finfo->g.sg);
+ iqcmd->ih.u64 = iq->partial_ih | (1ULL << 62) | ((uint64_t)frags << 48) | pkt_len;
+
+ while (frags--) {
+ finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
+ set_sg_size(&finfo->g.sg[(j >> 2)], m->data_len, (j & mask));
+ j++;
+ m = m->next;
+ }
+
+ /* Increment the host write index */
+ write_idx = otx_ep_incr_index(write_idx, 1, nb_desc);
+ tx_bytes += pkt_len;
+ }
+exit:
+ iq->host_write_index = write_idx;
+
+ /* ring dbell */
+ rte_io_wmb();
+ rte_write64(pkts, iq->doorbell_reg);
+ iq->instr_pending += pkts;
+ iq->stats.tx_pkts += pkts;
+ iq->stats.tx_bytes += tx_bytes;
+
+ return pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
+ uint16_t pkts;
+
+ pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending);
+
+ cnxk_ep_xmit_pkts_scalar(tx_pkts, iq, pkts);
+
+ if (iq->instr_pending >= OTX_EP_MAX_INSTR)
+ cnxk_ep_flush_iq(iq);
+
+ /* Return no# of instructions posted successfully. */
+ return pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
+ uint16_t pkts;
+
+ pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending);
+
+ pkts = cnxk_ep_xmit_pkts_scalar_mseg(tx_pkts, iq, pkts);
+
+ if (iq->instr_pending >= OTX_EP_MAX_INSTR)
+ cnxk_ep_flush_iq(iq);
+
+ /* Return no# of instructions posted successfully. */
+ return pkts;
+}
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 7b3669fe0c..ef275703c3 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -156,6 +156,8 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
(void *)iq->inst_cnt_ism, ism_addr);
*iq->inst_cnt_ism = 0;
iq->inst_cnt_ism_prev = 0;
+ iq->partial_ih = ((uint64_t)otx_ep->pkind) << 36;
+
return 0;
}
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.h b/drivers/net/octeon_ep/cnxk_ep_vf.h
index 86277449ea..41d8fbbb3a 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.h
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.h
@@ -6,6 +6,8 @@
#include <rte_io.h>
+#include "otx_ep_common.h"
+
#define CNXK_CONFIG_XPANSION_BAR 0x38
#define CNXK_CONFIG_PCIE_CAP 0x70
#define CNXK_CONFIG_PCIE_DEVCAP 0x74
@@ -178,6 +180,17 @@ struct cnxk_ep_instr_64B {
uint64_t exhdr[4];
};
+struct cnxk_ep_instr_32B {
+ /* Pointer where the input data is available. */
+ uint64_t dptr;
+
+ /* OTX_EP Instruction Header. */
+ union otx_ep_instr_ih ih;
+
+ /* Misc data bytes that can be passed as front data */
+ uint64_t rsvd[2];
+};
+
#define CNXK_EP_IQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue) + 4)
#define CNXK_EP_OQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue))
#define CNXK_EP_ISM_EN (0x1)
diff --git a/drivers/net/octeon_ep/meson.build b/drivers/net/octeon_ep/meson.build
index e698bf9792..749776d70c 100644
--- a/drivers/net/octeon_ep/meson.build
+++ b/drivers/net/octeon_ep/meson.build
@@ -9,4 +9,6 @@ sources = files(
'otx2_ep_vf.c',
'cnxk_ep_vf.c',
'otx_ep_mbox.c',
+ 'cnxk_ep_rx.c',
+ 'cnxk_ep_tx.c',
)
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c b/drivers/net/octeon_ep/otx2_ep_vf.c
index f72b8d25d7..7f4edf8dcf 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -307,6 +307,7 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
(unsigned int)ism_addr);
*iq->inst_cnt_ism = 0;
iq->inst_cnt_ism_prev = 0;
+ iq->partial_ih = ((uint64_t)otx_ep->pkind) << 36;
return 0;
}
diff --git a/drivers/net/octeon_ep/otx_ep_common.h b/drivers/net/octeon_ep/otx_ep_common.h
index 90e059cad0..82e57520d3 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -4,7 +4,20 @@
#ifndef _OTX_EP_COMMON_H_
#define _OTX_EP_COMMON_H_
+#include <rte_bitops.h>
#include <rte_spinlock.h>
+#include <unistd.h>
+#include <assert.h>
+#include <rte_eal.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_io.h>
+#include <rte_net.h>
+#include <ethdev_pci.h>
+
+#define OTX_EP_CN8XX RTE_BIT32(0)
+#define OTX_EP_CN9XX RTE_BIT32(1)
+#define OTX_EP_CN10XX RTE_BIT32(2)
#define OTX_EP_NW_PKT_OP 0x1220
#define OTX_EP_NW_CMD_OP 0x1221
@@ -38,7 +51,7 @@
#define OTX_EP_NORESP_OHSM_SEND (4)
#define OTX_EP_NORESP_LAST (4)
#define OTX_EP_PCI_RING_ALIGN 65536
-#define OTX_EP_MAX_SG_LISTS 4
+#define OTX_EP_MAX_SG_LISTS 6
#define OTX_EP_NUM_SG_PTRS 4
#define SDP_PKIND 40
#define SDP_OTX2_PKIND 57
@@ -203,6 +216,38 @@ struct otx_ep_iq_config {
* such structure to represent it.
*/
struct otx_ep_instr_queue {
+ /* Location in memory updated by SDP ISM */
+ uint32_t *inst_cnt_ism;
+ struct rte_mbuf **mbuf_list;
+ /* Pointer to the Virtual Base addr of the input ring. */
+ uint8_t *base_addr;
+
+ /* track inst count locally to consolidate HW counter updates */
+ uint32_t inst_cnt_ism_prev;
+
+ /* Input ring index, where the driver should write the next packet */
+ uint32_t host_write_index;
+
+ /* Input ring index, where the OCTEON 9 should read the next packet */
+ uint32_t otx_read_index;
+ /** This index aids in finding the window in the queue where OCTEON 9
+ * has read the commands.
+ */
+ uint32_t flush_index;
+ /* This keeps track of the instructions pending in this queue. */
+ uint64_t instr_pending;
+
+ /* Memory zone */
+ const struct rte_memzone *iq_mz;
+ /* OTX_EP doorbell register for the ring. */
+ void *doorbell_reg;
+
+ /* OTX_EP instruction count register for this ring. */
+ void *inst_cnt_reg;
+
+ /* Number of instructions pending to be posted to OCTEON 9. */
+ uint32_t fill_cnt;
+
struct otx_ep_device *otx_ep_dev;
uint32_t q_no;
@@ -219,54 +264,21 @@ struct otx_ep_instr_queue {
/* Size of the descriptor. */
uint8_t desc_size;
- /* Input ring index, where the driver should write the next packet */
- uint32_t host_write_index;
-
- /* Input ring index, where the OCTEON 9 should read the next packet */
- uint32_t otx_read_index;
-
uint32_t reset_instr_cnt;
- /** This index aids in finding the window in the queue where OCTEON 9
- * has read the commands.
- */
- uint32_t flush_index;
-
/* Free-running/wrapping instruction counter for IQ. */
uint32_t inst_cnt;
- /* This keeps track of the instructions pending in this queue. */
- uint64_t instr_pending;
-
- /* Pointer to the Virtual Base addr of the input ring. */
- uint8_t *base_addr;
+ uint64_t partial_ih;
/* This IQ request list */
struct otx_ep_instr_list *req_list;
- /* OTX_EP doorbell register for the ring. */
- void *doorbell_reg;
-
- /* OTX_EP instruction count register for this ring. */
- void *inst_cnt_reg;
-
- /* Number of instructions pending to be posted to OCTEON 9. */
- uint32_t fill_cnt;
-
/* Statistics for this input queue. */
struct otx_ep_iq_stats stats;
/* DMA mapped base address of the input descriptor ring. */
uint64_t base_addr_dma;
-
- /* Memory zone */
- const struct rte_memzone *iq_mz;
-
- /* Location in memory updated by SDP ISM */
- uint32_t *inst_cnt_ism;
-
- /* track inst count locally to consolidate HW counter updates */
- uint32_t inst_cnt_ism_prev;
};
/** Descriptor format.
@@ -344,14 +356,17 @@ struct otx_ep_oq_config {
/* The Descriptor Ring Output Queue(DROQ) structure. */
struct otx_ep_droq {
- struct otx_ep_device *otx_ep_dev;
/* The 8B aligned descriptor ring starts at this address. */
struct otx_ep_droq_desc *desc_ring;
- uint32_t q_no;
- uint64_t last_pkt_count;
+ /* The 8B aligned info ptrs begin from this address. */
+ struct otx_ep_droq_info *info_list;
- struct rte_mempool *mpool;
+ /* receive buffer list contains mbuf ptr list */
+ struct rte_mbuf **recv_buf_list;
+
+ /* Packets pending to be processed */
+ uint64_t pkts_pending;
/* Driver should read the next packet at this index */
uint32_t read_idx;
@@ -362,22 +377,17 @@ struct otx_ep_droq {
/* At this index, the driver will refill the descriptor's buffer */
uint32_t refill_idx;
- /* Packets pending to be processed */
- uint64_t pkts_pending;
+ /* The number of descriptors pending to refill. */
+ uint32_t refill_count;
/* Number of descriptors in this ring. */
uint32_t nb_desc;
- /* The number of descriptors pending to refill. */
- uint32_t refill_count;
-
uint32_t refill_threshold;
- /* The 8B aligned info ptrs begin from this address. */
- struct otx_ep_droq_info *info_list;
+ uint64_t last_pkt_count;
- /* receive buffer list contains mbuf ptr list */
- struct rte_mbuf **recv_buf_list;
+ struct rte_mempool *mpool;
/* The size of each buffer pointed by the buffer pointer. */
uint32_t buffer_size;
@@ -392,6 +402,13 @@ struct otx_ep_droq {
*/
void *pkts_sent_reg;
+ /* Pointer to host memory copy of output packet count, set by ISM */
+ uint32_t *pkts_sent_ism;
+ uint32_t pkts_sent_ism_prev;
+
+ /* Statistics for this DROQ. */
+ struct otx_ep_droq_stats stats;
+
/** Handle DMA incompletion during pkt reads.
* This variable is used to initiate a sent_reg_read
* that completes pending dma
@@ -400,8 +417,9 @@ struct otx_ep_droq {
*/
uint32_t sent_reg_val;
- /* Statistics for this DROQ. */
- struct otx_ep_droq_stats stats;
+ uint32_t q_no;
+
+ struct otx_ep_device *otx_ep_dev;
/* DMA mapped address of the DROQ descriptor ring. */
size_t desc_ring_dma;
@@ -419,10 +437,6 @@ struct otx_ep_droq {
const struct rte_memzone *desc_ring_mz;
const struct rte_memzone *info_mz;
-
- /* Pointer to host memory copy of output packet count, set by ISM */
- uint32_t *pkts_sent_ism;
- uint32_t pkts_sent_ism_prev;
};
#define OTX_EP_DROQ_SIZE (sizeof(struct otx_ep_droq))
@@ -545,6 +559,9 @@ struct otx_ep_device {
/* Negotiated Mbox version */
uint32_t mbox_neg_ver;
+
+ /* Generation */
+ uint32_t chip_gen;
};
int otx_ep_setup_iqs(struct otx_ep_device *otx_ep, uint32_t iq_no,
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c b/drivers/net/octeon_ep/otx_ep_ethdev.c
index 57b965ad06..e965cbaa16 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -27,6 +27,46 @@ static const struct rte_eth_desc_lim otx_ep_tx_desc_lim = {
.nb_align = OTX_EP_TXD_ALIGN,
};
+static void
+otx_ep_set_tx_func(struct rte_eth_dev *eth_dev)
+{
+ struct otx_ep_device *otx_epvf = OTX_EP_DEV(eth_dev);
+
+ if (otx_epvf->chip_gen == OTX_EP_CN10XX || otx_epvf->chip_gen == OTX_EP_CN9XX) {
+ eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts;
+ if (otx_epvf->tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS)
+ eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts_mseg;
+ } else {
+ eth_dev->tx_pkt_burst = &otx_ep_xmit_pkts;
+ }
+
+ if (eth_dev->data->dev_started)
+ rte_eth_fp_ops[eth_dev->data->port_id].tx_pkt_burst =
+ eth_dev->tx_pkt_burst;
+}
+
+static void
+otx_ep_set_rx_func(struct rte_eth_dev *eth_dev)
+{
+ struct otx_ep_device *otx_epvf = OTX_EP_DEV(eth_dev);
+
+ if (otx_epvf->chip_gen == OTX_EP_CN10XX) {
+ eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts;
+ if (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts_mseg;
+ } else if (otx_epvf->chip_gen == OTX_EP_CN9XX) {
+ eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts;
+ if (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts_mseg;
+ } else {
+ eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
+ }
+
+ if (eth_dev->data->dev_started)
+ rte_eth_fp_ops[eth_dev->data->port_id].rx_pkt_burst =
+ eth_dev->rx_pkt_burst;
+}
+
static int
otx_ep_dev_info_get(struct rte_eth_dev *eth_dev,
struct rte_eth_dev_info *devinfo)
@@ -154,6 +194,10 @@ otx_ep_dev_start(struct rte_eth_dev *eth_dev)
}
otx_ep_dev_link_update(eth_dev, 0);
+
+ otx_ep_set_tx_func(eth_dev);
+ otx_ep_set_rx_func(eth_dev);
+
otx_ep_info("dev started\n");
return 0;
@@ -255,18 +299,23 @@ otx_epdev_init(struct otx_ep_device *otx_epvf)
otx_epvf->fn_list.setup_device_regs(otx_epvf);
+ otx_epvf->eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts;
otx_epvf->eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
- if (otx_epvf->chip_id == PCI_DEVID_OCTEONTX_EP_VF)
+ if (otx_epvf->chip_id == PCI_DEVID_OCTEONTX_EP_VF) {
otx_epvf->eth_dev->tx_pkt_burst = &otx_ep_xmit_pkts;
- else if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
+ otx_epvf->chip_gen = OTX_EP_CN8XX;
+ } else if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CN98XX_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CNF95N_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF) {
- otx_epvf->eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
+ otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF) {
+ otx_epvf->eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts;
+ otx_epvf->chip_gen = OTX_EP_CN9XX;
+ } else if (otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF) {
+ otx_epvf->eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts;
+ otx_epvf->chip_gen = OTX_EP_CN10XX;
} else {
otx_ep_err("Invalid chip_id\n");
ret = -EINVAL;
@@ -656,8 +705,8 @@ otx_ep_eth_dev_init(struct rte_eth_dev *eth_dev)
/* Single process support */
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
eth_dev->dev_ops = &otx_ep_eth_dev_ops;
- eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
- eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
+ otx_ep_set_tx_func(eth_dev);
+ otx_ep_set_rx_func(eth_dev);
return 0;
}
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c b/drivers/net/octeon_ep/otx_ep_rxtx.c
index 2654e13e18..f53f0578ef 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -13,15 +13,8 @@
#include "otx_ep_common.h"
#include "otx_ep_vf.h"
-#include "otx2_ep_vf.h"
#include "otx_ep_rxtx.h"
-/* SDP_LENGTH_S specifies packet length and is of 8-byte size */
-#define OTX_EP_INFO_SIZE 8
-#define OTX_EP_FSZ_FS0 0
-#define DROQ_REFILL_THRESHOLD 16
-#define OTX2_SDP_REQUEST_ISM (0x1ULL << 63)
-
static void
otx_ep_dmazone_free(const struct rte_memzone *mz)
{
@@ -144,6 +137,13 @@ otx_ep_init_instr_queue(struct otx_ep_device *otx_ep, int iq_no, int num_descs,
iq_no, iq->base_addr, (unsigned long)iq->base_addr_dma,
iq->nb_desc);
+ iq->mbuf_list = rte_zmalloc_socket("mbuf_list", (iq->nb_desc * sizeof(struct rte_mbuf *)),
+ RTE_CACHE_LINE_SIZE, rte_socket_id());
+ if (!iq->mbuf_list) {
+ otx_ep_err("IQ[%d] mbuf_list alloc failed\n", iq_no);
+ goto iq_init_fail;
+ }
+
iq->otx_ep_dev = otx_ep;
iq->q_no = iq_no;
iq->fill_cnt = 0;
@@ -676,85 +676,6 @@ otx_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
return count;
}
-/* Enqueue requests/packets to OTX_EP IQ queue.
- * returns number of requests enqueued successfully
- */
-uint16_t
-otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
-{
- struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
- struct otx_ep_device *otx_ep = iq->otx_ep_dev;
- struct otx2_ep_instr_64B iqcmd2;
- uint32_t iqreq_type;
- struct rte_mbuf *m;
- uint32_t pkt_len;
- int count = 0;
- uint16_t i;
- int dbell;
- int index;
-
- iqcmd2.ih.u64 = 0;
- iqcmd2.irh.u64 = 0;
-
- /* ih invars */
- iqcmd2.ih.s.fsz = OTX_EP_FSZ_FS0;
- iqcmd2.ih.s.pkind = otx_ep->pkind; /* The SDK decided PKIND value */
- /* irh invars */
- iqcmd2.irh.s.opcode = OTX_EP_NW_PKT_OP;
-
- for (i = 0; i < nb_pkts; i++) {
- m = pkts[i];
- if (m->nb_segs == 1) {
- pkt_len = rte_pktmbuf_data_len(m);
- iqcmd2.ih.s.tlen = pkt_len + iqcmd2.ih.s.fsz;
- iqcmd2.dptr = rte_mbuf_data_iova(m); /*dptr*/
- iqcmd2.ih.s.gather = 0;
- iqcmd2.ih.s.gsz = 0;
- iqreq_type = OTX_EP_REQTYPE_NORESP_NET;
- } else {
- if (!(otx_ep->tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS))
- goto xmit_fail;
-
- if (unlikely(prepare_xmit_gather_list(iq, m, &iqcmd2.dptr, &iqcmd2.ih) < 0))
- goto xmit_fail;
-
- pkt_len = rte_pktmbuf_pkt_len(m);
- iqreq_type = OTX_EP_REQTYPE_NORESP_GATHER;
- }
-
- iqcmd2.irh.u64 = rte_bswap64(iqcmd2.irh.u64);
-
-#ifdef OTX_EP_IO_DEBUG
- otx_ep_dbg("After swapping\n");
- otx_ep_dbg("Word0 [dptr]: 0x%016lx\n",
- (unsigned long)iqcmd.dptr);
- otx_ep_dbg("Word1 [ihtx]: 0x%016lx\n", (unsigned long)iqcmd.ih);
- otx_ep_dbg("Word2 [pki_ih3]: 0x%016lx\n",
- (unsigned long)iqcmd.pki_ih3);
- otx_ep_dbg("Word3 [rptr]: 0x%016lx\n",
- (unsigned long)iqcmd.rptr);
- otx_ep_dbg("Word4 [irh]: 0x%016lx\n", (unsigned long)iqcmd.irh);
- otx_ep_dbg("Word5 [exhdr[0]]: 0x%016lx\n",
- (unsigned long)iqcmd.exhdr[0]);
-#endif
- index = iq->host_write_index;
- dbell = (i == (unsigned int)(nb_pkts - 1)) ? 1 : 0;
- if (otx_ep_send_data(otx_ep, iq, &iqcmd2, dbell))
- goto xmit_fail;
- otx_ep_iqreq_add(iq, m, iqreq_type, index);
- iq->stats.tx_pkts++;
- iq->stats.tx_bytes += pkt_len;
- count++;
- }
-
-xmit_fail:
- if (iq->instr_pending >= OTX_EP_MAX_INSTR)
- otx_ep_flush_iq(iq);
-
- /* Return no# of instructions posted successfully. */
- return count;
-}
-
static uint32_t
otx_ep_droq_refill(struct otx_ep_droq *droq)
{
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.h b/drivers/net/octeon_ep/otx_ep_rxtx.h
index 3f12527004..cb68ef3b41 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.h
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.h
@@ -7,29 +7,53 @@
#include <rte_byteorder.h>
-#define OTX_EP_RXD_ALIGN 2
-#define OTX_EP_TXD_ALIGN 2
+#define OTX_EP_RXD_ALIGN 8
+#define OTX_EP_TXD_ALIGN 8
#define OTX_EP_IQ_SEND_FAILED (-1)
#define OTX_EP_IQ_SEND_SUCCESS (0)
-#define OTX_EP_MAX_DELAYED_PKT_RETRIES 10000
+#define OTX_EP_MAX_DELAYED_PKT_RETRIES 10
#define OTX_EP_FSZ 28
#define OTX2_EP_FSZ 24
-#define OTX_EP_MAX_INSTR 16
+#define OTX_EP_MAX_INSTR 128
+
+/* SDP_LENGTH_S specifies packet length and is of 8-byte size */
+#define OTX_EP_INFO_SIZE 8
+#define DROQ_REFILL_THRESHOLD 16
+#define OTX2_SDP_REQUEST_ISM (0x1ULL << 63)
static inline uint32_t
otx_ep_incr_index(uint32_t index, uint32_t count, uint32_t max)
{
return ((index + count) & (max - 1));
}
+
uint16_t
otx_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
uint16_t
otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+otx_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cnxk_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+cnxk_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cnxk_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cn9k_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
uint16_t
-otx_ep_recv_pkts(void *rx_queue,
- struct rte_mbuf **rx_pkts,
- uint16_t budget);
+cn9k_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
#endif /* _OTX_EP_RXTX_H_ */
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v4 3/3] net/octeon_ep: add new fastpath routines
2023-10-12 6:23 ` [PATCH v4 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
@ 2023-10-18 3:48 ` Jerin Jacob
0 siblings, 0 replies; 26+ messages in thread
From: Jerin Jacob @ 2023-10-18 3:48 UTC (permalink / raw)
To: Vamsi Attunuru; +Cc: dev, jerinj, sthotton
On Thu, Oct 12, 2023 at 4:41 PM Vamsi Attunuru <vattunuru@marvell.com> wrote:
>
> Adds new fastpath routines for cn10k & cn9k endpoint
> devices and assigns the fastpath routines based on
> the offload flags.
>
> Patch also adds misc changes to improve performance
> and code-readability.
>
> Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
Could you rebase to next-net-mrvl. There is build issue, either due to
latest clang or new changes in main.
[for-next-net][dpdk-next-net-mrvl] $ clang -v
clang version 16.0.6
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/bin
Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-pc-linux-gnu/13.2.1
Found candidate GCC installation:
/usr/bin/../lib64/gcc/x86_64-pc-linux-gnu/13.2.1
Selected GCC installation: /usr/bin/../lib64/gcc/x86_64-pc-linux-gnu/13.2.1
Candidate multilib: .;@m64
Candidate multilib: 32;@m32
Selected multilib: .;@m64
ccache clang -Idrivers/libtmp_rte_net_octeon_ep.a.p -Idrivers
-I../drivers -Idrivers/net/octeon_ep -I../drivers/net/octeon_ep
-Ilib/ethdev -I../lib/ethdev -I. -I.. -Iconfig -I../config
-Ilib/eal/include -I../lib/eal/include -Ilib/eal/linux/
include -I../lib/eal/linux/include -Ilib/eal/x86/include
-I../lib/eal/x86/include -Ilib/eal/common -I../lib/eal/common
-Ilib/eal -I../lib/eal -Ilib/kvargs -I../lib/kvargs -Ilib/log
-I../lib/log -Ilib/metrics -I../lib/metrics -Ilib/telemetry
-I../lib/telemetry -Ilib/net -I../lib/net -Ilib/mbuf -I../lib/mbuf
-Ilib/mempool -I../lib/mempool -Ilib/ring -I../lib/ring -Ilib/meter
-I../lib/meter -Idrivers/bus/pci -I../drivers/bus/pci
-I../drivers/bus/pci/linux -Ilib/pci -I../lib/pci
-Idrivers/bus/vdev -I../drivers/bus/vdev -fcolor-diagnostics
-D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Wextra -Werror -std=c11
-O2 -g -include rte_config.h -Wcast-qual -Wdeprecated -Wformat
-Wformat-nonliteral -Wformat-security -Wmissing-
declarations -Wmissing-prototypes -Wnested-externs
-Wold-style-definition -Wpointer-arith -Wsign-compare
-Wstrict-prototypes -Wundef -Wwrite-strings
-Wno-address-of-packed-member -Wno-missing-field-initializers
-D_GNU_SOURCE -fPIC -march=na
tive -mrtm -DALLOW_EXPERIMENTAL_API -DALLOW_INTERNAL_API
-DRTE_LOG_DEFAULT_LOGTYPE=pmd.net.octeon_ep -DRTE_ANNOTATE_LOCKS
-Wthread-safety -MD -MQ
drivers/libtmp_rte_net_octeon_ep.a.p/net_octeon_ep_otx_ep_rxtx.c.o -MF
drivers/libtmp_rte_net_
octeon_ep.a.p/net_octeon_ep_otx_ep_rxtx.c.o.d -o
drivers/libtmp_rte_net_octeon_ep.a.p/net_octeon_ep_otx_ep_rxtx.c.o -c
../drivers/net/octeon_ep/otx_ep_rxtx.c
../drivers/net/octeon_ep/otx_ep_rxtx.c:448:10: error: address argument
to atomic operation must be a pointer to _Atomic type ('uint32_t *'
(aka 'unsigned int *') invalid)
while (rte_atomic_load_explicit(iq->inst_cnt_ism,
rte_memory_order_relaxed) >=
^ ~~~~~~~~~~~~~~~~
../lib/eal/include/rte_stdatomic.h:71:2: note: expanded from macro
'rte_atomic_load_explicit'
atomic_load_explicit(ptr, memorder)
^ ~~~
/usr/lib/clang/16/include/stdatomic.h:134:30: note: expanded from
macro 'atomic_load_explicit'
#define atomic_load_explicit __c11_atomic_load
^
../drivers/net/octeon_ep/otx_ep_rxtx.c:924:10: error: address argument
to atomic operation must be a pointer to _Atomic type ('uint32_t *'
(aka 'unsigned int *') invalid)
while (rte_atomic_load_explicit(droq->pkts_sent_ism,
rte_memory_order_relaxed) >=
^ ~~~~~~~~~~~~~~~~~~~
../lib/eal/include/rte_stdatomic.h:71:2: note: expanded from macro
'rte_atomic_load_explicit'
atomic_load_explicit(ptr, memorder)
^ ~~~
/usr/lib/clang/16/include/stdatomic.h:134:30: note: expanded from
macro 'atomic_load_explicit'
#define atomic_load_explicit __c11_atomic_load
^
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v5 0/3] rewrite fastpath routines
2023-10-12 6:23 ` [PATCH v4 0/3] rewrite " Vamsi Attunuru
` (2 preceding siblings ...)
2023-10-12 6:23 ` [PATCH v4 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
@ 2023-10-18 8:07 ` Vamsi Attunuru
2023-10-18 8:07 ` [PATCH v5 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
` (3 more replies)
3 siblings, 4 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-18 8:07 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
This series adds new fastpath routines for cn10k & cn9k endpoint
devices and supports 32B Tx desciptor format which improves the
performance.
V5 changes:
- Series rebased
v4 changes:
- Use rte_atomic_xxx instead of __atomic_xxx built-ins
v2 & v3 changes:
- Fixed CI
Shijith Thotton (1):
net/octeon_ep: support 32B IQ descriptor size
Vamsi Attunuru (2):
net/octeon_ep: clean up receive routine
net/octeon_ep: add new fastpath routines
drivers/net/octeon_ep/cnxk_ep_rx.c | 310 ++++++++++++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_tx.c | 210 +++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_vf.c | 12 +-
drivers/net/octeon_ep/cnxk_ep_vf.h | 13 ++
drivers/net/octeon_ep/meson.build | 2 +
drivers/net/octeon_ep/otx2_ep_vf.c | 11 +-
drivers/net/octeon_ep/otx_ep_common.h | 127 ++++++-----
drivers/net/octeon_ep/otx_ep_ethdev.c | 69 +++++-
drivers/net/octeon_ep/otx_ep_rxtx.c | 257 +++++++--------------
drivers/net/octeon_ep/otx_ep_rxtx.h | 38 +++-
drivers/net/octeon_ep/otx_ep_vf.c | 8 +
11 files changed, 805 insertions(+), 252 deletions(-)
create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx.c
create mode 100644 drivers/net/octeon_ep/cnxk_ep_tx.c
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v5 1/3] net/octeon_ep: support 32B IQ descriptor size
2023-10-18 8:07 ` [PATCH v5 0/3] rewrite " Vamsi Attunuru
@ 2023-10-18 8:07 ` Vamsi Attunuru
2023-10-18 8:07 ` [PATCH v5 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
` (2 subsequent siblings)
3 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-18 8:07 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton
From: Shijith Thotton <sthotton@marvell.com>
Update input queue setup to consider descriptor size in driver conf.
The default instruction size for otx2 and cnxk devices has been updated
to 32 bytes.
Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
drivers/net/octeon_ep/cnxk_ep_vf.c | 10 +++++++++-
drivers/net/octeon_ep/otx2_ep_vf.c | 10 +++++++++-
drivers/net/octeon_ep/otx_ep_common.h | 4 ++++
drivers/net/octeon_ep/otx_ep_vf.c | 8 ++++++++
4 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 92c2d2ca5c..7b3669fe0c 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -106,6 +106,14 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(CNXK_EP_R_IN_CTL_IS_64B);
+ else
+ reg_val |= CNXK_EP_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + CNXK_EP_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + CNXK_EP_R_IN_INSTR_RSIZE(iq_no));
@@ -354,7 +362,7 @@ static const struct otx_ep_config default_cnxk_ep_conf = {
/* IQ attributes */
.iq = {
.max_iqs = OTX_EP_CFG_IO_QUEUES,
- .instr_type = OTX_EP_64BYTE_INSTR,
+ .instr_type = OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c b/drivers/net/octeon_ep/otx2_ep_vf.c
index ced3a415a5..f72b8d25d7 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -256,6 +256,14 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(SDP_VF_R_IN_CTL_IS_64B);
+ else
+ reg_val |= SDP_VF_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + SDP_VF_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_RSIZE(iq_no));
@@ -500,7 +508,7 @@ static const struct otx_ep_config default_otx2_ep_conf = {
/* IQ attributes */
.iq = {
.max_iqs = OTX_EP_CFG_IO_QUEUES,
- .instr_type = OTX_EP_64BYTE_INSTR,
+ .instr_type = OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx_ep_common.h b/drivers/net/octeon_ep/otx_ep_common.h
index c150cbe619..90e059cad0 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -11,6 +11,7 @@
#define OTX_EP_MAX_RINGS_PER_VF (8)
#define OTX_EP_CFG_IO_QUEUES OTX_EP_MAX_RINGS_PER_VF
+#define OTX_EP_32BYTE_INSTR (32)
#define OTX_EP_64BYTE_INSTR (64)
/*
* Backpressure for SDP is configured on Octeon, and the minimum queue sizes
@@ -215,6 +216,9 @@ struct otx_ep_instr_queue {
/* Number of descriptors in this ring. */
uint32_t nb_desc;
+ /* Size of the descriptor. */
+ uint8_t desc_size;
+
/* Input ring index, where the driver should write the next packet */
uint32_t host_write_index;
diff --git a/drivers/net/octeon_ep/otx_ep_vf.c b/drivers/net/octeon_ep/otx_ep_vf.c
index 4f3538146b..236b7a874c 100644
--- a/drivers/net/octeon_ep/otx_ep_vf.c
+++ b/drivers/net/octeon_ep/otx_ep_vf.c
@@ -120,6 +120,14 @@ otx_ep_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (iq->desc_size == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(OTX_EP_R_IN_CTL_IS_64B);
+ else
+ reg_val |= OTX_EP_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + OTX_EP_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
otx_ep_write64(iq->base_addr_dma, otx_ep->hw_addr,
OTX_EP_R_IN_INSTR_BADDR(iq_no));
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v5 2/3] net/octeon_ep: clean up receive routine
2023-10-18 8:07 ` [PATCH v5 0/3] rewrite " Vamsi Attunuru
2023-10-18 8:07 ` [PATCH v5 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
@ 2023-10-18 8:07 ` Vamsi Attunuru
2023-10-18 8:07 ` [PATCH v5 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
2023-10-18 11:14 ` [PATCH v6 0/3] rewrite " Vamsi Attunuru
3 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-18 8:07 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
Patch improves Rx routine and pkt count update routines,
packet count update routines need to drain inflight ISM
memory updates while decrementing the packet count register.
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
---
drivers/net/octeon_ep/otx_ep_rxtx.c | 164 ++++++++++++----------------
1 file changed, 70 insertions(+), 94 deletions(-)
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c b/drivers/net/octeon_ep/otx_ep_rxtx.c
index b37fc8109f..2654e13e18 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -442,7 +442,15 @@ otx_vf_update_read_index(struct otx_ep_instr_queue *iq)
* when count above halfway to saturation.
*/
rte_write32(val, iq->inst_cnt_reg);
- *iq->inst_cnt_ism = 0;
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ while (rte_atomic_load_explicit(iq->inst_cnt_ism, rte_memory_order_relaxed) >=
+ val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ rte_mb();
+ }
+
iq->inst_cnt_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
@@ -567,9 +575,7 @@ prepare_xmit_gather_list(struct otx_ep_instr_queue *iq, struct rte_mbuf *m, uint
finfo = &iq->req_list[iq->host_write_index].finfo;
*dptr = rte_mem_virt2iova(finfo->g.sg);
- ih->s.tlen = pkt_len + ih->s.fsz;
- ih->s.gsz = frags;
- ih->s.gather = 1;
+ ih->u64 |= ((1ULL << 62) | ((uint64_t)frags << 48) | (pkt_len + ih->s.fsz));
while (frags--) {
finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
@@ -752,36 +758,26 @@ otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
static uint32_t
otx_ep_droq_refill(struct otx_ep_droq *droq)
{
- struct otx_ep_droq_desc *desc_ring;
+ struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
struct otx_ep_droq_info *info;
struct rte_mbuf *buf = NULL;
uint32_t desc_refilled = 0;
- desc_ring = droq->desc_ring;
-
while (droq->refill_count && (desc_refilled < droq->nb_desc)) {
- /* If a valid buffer exists (happens if there is no dispatch),
- * reuse the buffer, else allocate.
- */
- if (droq->recv_buf_list[droq->refill_idx] != NULL)
- break;
-
buf = rte_pktmbuf_alloc(droq->mpool);
/* If a buffer could not be allocated, no point in
* continuing
*/
- if (buf == NULL) {
+ if (unlikely(!buf)) {
droq->stats.rx_alloc_failure++;
break;
}
info = rte_pktmbuf_mtod(buf, struct otx_ep_droq_info *);
- memset(info, 0, sizeof(*info));
+ info->length = 0;
droq->recv_buf_list[droq->refill_idx] = buf;
desc_ring[droq->refill_idx].buffer_ptr =
rte_mbuf_data_iova_default(buf);
-
-
droq->refill_idx = otx_ep_incr_index(droq->refill_idx, 1,
droq->nb_desc);
@@ -793,21 +789,18 @@ otx_ep_droq_refill(struct otx_ep_droq *droq)
}
static struct rte_mbuf *
-otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
- struct otx_ep_droq *droq, int next_fetch)
+otx_ep_droq_read_packet(struct otx_ep_device *otx_ep, struct otx_ep_droq *droq, int next_fetch)
{
volatile struct otx_ep_droq_info *info;
- struct rte_mbuf *droq_pkt2 = NULL;
- struct rte_mbuf *droq_pkt = NULL;
- struct rte_net_hdr_lens hdr_lens;
- struct otx_ep_droq_info *info2;
+ struct rte_mbuf *mbuf_next = NULL;
+ struct rte_mbuf *mbuf = NULL;
uint64_t total_pkt_len;
uint32_t pkt_len = 0;
int next_idx;
- droq_pkt = droq->recv_buf_list[droq->read_idx];
- droq_pkt2 = droq->recv_buf_list[droq->read_idx];
- info = rte_pktmbuf_mtod(droq_pkt, struct otx_ep_droq_info *);
+ mbuf = droq->recv_buf_list[droq->read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
/* make sure info is available */
rte_rmb();
if (unlikely(!info->length)) {
@@ -828,32 +821,25 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
assert(0);
}
}
+
if (next_fetch) {
next_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
- droq_pkt2 = droq->recv_buf_list[next_idx];
- info2 = rte_pktmbuf_mtod(droq_pkt2, struct otx_ep_droq_info *);
- rte_prefetch_non_temporal((const void *)info2);
+ mbuf_next = droq->recv_buf_list[next_idx];
+ rte_prefetch0(rte_pktmbuf_mtod(mbuf_next, void *));
}
- info->length = rte_bswap64(info->length);
+ info->length = rte_bswap16(info->length >> 48);
/* Deduce the actual data size */
total_pkt_len = info->length + OTX_EP_INFO_SIZE;
if (total_pkt_len <= droq->buffer_size) {
- droq_pkt = droq->recv_buf_list[droq->read_idx];
- if (likely(droq_pkt != NULL)) {
- droq_pkt->data_off += OTX_EP_INFO_SIZE;
- /* otx_ep_dbg("OQ: pkt_len[%ld], buffer_size %d\n",
- * (long)info->length, droq->buffer_size);
- */
- pkt_len = (uint32_t)info->length;
- droq_pkt->pkt_len = pkt_len;
- droq_pkt->data_len = pkt_len;
- droq_pkt->port = otx_ep->port_id;
- droq->recv_buf_list[droq->read_idx] = NULL;
- droq->read_idx = otx_ep_incr_index(droq->read_idx, 1,
- droq->nb_desc);
- droq->refill_count++;
- }
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ pkt_len = (uint32_t)info->length;
+ mbuf->pkt_len = pkt_len;
+ mbuf->data_len = pkt_len;
+ mbuf->port = otx_ep->port_id;
+ droq->recv_buf_list[droq->read_idx] = NULL;
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
+ droq->refill_count++;
} else {
struct rte_mbuf *first_buf = NULL;
struct rte_mbuf *last_buf = NULL;
@@ -865,61 +851,50 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
while (pkt_len < total_pkt_len) {
int cpy_len = 0;
- cpy_len = ((pkt_len + droq->buffer_size) >
- total_pkt_len)
- ? ((uint32_t)total_pkt_len -
- pkt_len)
+ cpy_len = ((pkt_len + droq->buffer_size) > total_pkt_len)
+ ? ((uint32_t)total_pkt_len - pkt_len)
: droq->buffer_size;
- droq_pkt = droq->recv_buf_list[droq->read_idx];
+ mbuf = droq->recv_buf_list[droq->read_idx];
droq->recv_buf_list[droq->read_idx] = NULL;
- if (likely(droq_pkt != NULL)) {
+ if (likely(mbuf)) {
/* Note the first seg */
if (!pkt_len)
- first_buf = droq_pkt;
+ first_buf = mbuf;
- droq_pkt->port = otx_ep->port_id;
+ mbuf->port = otx_ep->port_id;
if (!pkt_len) {
- droq_pkt->data_off +=
- OTX_EP_INFO_SIZE;
- droq_pkt->pkt_len =
- cpy_len - OTX_EP_INFO_SIZE;
- droq_pkt->data_len =
- cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_len = cpy_len - OTX_EP_INFO_SIZE;
} else {
- droq_pkt->pkt_len = cpy_len;
- droq_pkt->data_len = cpy_len;
+ mbuf->pkt_len = cpy_len;
+ mbuf->data_len = cpy_len;
}
if (pkt_len) {
first_buf->nb_segs++;
- first_buf->pkt_len += droq_pkt->pkt_len;
+ first_buf->pkt_len += mbuf->pkt_len;
}
if (last_buf)
- last_buf->next = droq_pkt;
+ last_buf->next = mbuf;
- last_buf = droq_pkt;
+ last_buf = mbuf;
} else {
otx_ep_err("no buf\n");
assert(0);
}
pkt_len += cpy_len;
- droq->read_idx = otx_ep_incr_index(droq->read_idx, 1,
- droq->nb_desc);
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
droq->refill_count++;
}
- droq_pkt = first_buf;
+ mbuf = first_buf;
}
- droq_pkt->packet_type = rte_net_get_ptype(droq_pkt, &hdr_lens,
- RTE_PTYPE_ALL_MASK);
- droq_pkt->l2_len = hdr_lens.l2_len;
- droq_pkt->l3_len = hdr_lens.l3_len;
- droq_pkt->l4_len = hdr_lens.l4_len;
- return droq_pkt;
+ return mbuf;
}
static inline uint32_t
@@ -943,7 +918,15 @@ otx_ep_check_droq_pkts(struct otx_ep_droq *droq)
* when count above halfway to saturation.
*/
rte_write32(val, droq->pkts_sent_reg);
- *droq->pkts_sent_ism = 0;
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ while (rte_atomic_load_explicit(droq->pkts_sent_ism, rte_memory_order_relaxed) >=
+ val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ rte_mb();
+ }
+
droq->pkts_sent_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
@@ -952,36 +935,30 @@ otx_ep_check_droq_pkts(struct otx_ep_droq *droq)
return new_pkts;
}
+static inline int32_t __rte_hot
+otx_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
+{
+ if (unlikely(droq->pkts_pending < nb_pkts))
+ otx_ep_check_droq_pkts(droq);
+
+ return RTE_MIN(nb_pkts, droq->pkts_pending);
+}
+
/* Check for response arrival from OCTEON 9
* returns number of requests completed
*/
uint16_t
-otx_ep_recv_pkts(void *rx_queue,
- struct rte_mbuf **rx_pkts,
- uint16_t budget)
+otx_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
{
struct otx_ep_droq *droq = rx_queue;
struct otx_ep_device *otx_ep;
struct rte_mbuf *oq_pkt;
-
- uint32_t pkts = 0;
+ uint16_t pkts, new_pkts;
uint32_t valid_pkts = 0;
- uint32_t new_pkts = 0;
int next_fetch;
otx_ep = droq->otx_ep_dev;
-
- if (droq->pkts_pending > budget) {
- new_pkts = budget;
- } else {
- new_pkts = droq->pkts_pending;
- new_pkts += otx_ep_check_droq_pkts(droq);
- if (new_pkts > budget)
- new_pkts = budget;
- }
-
- if (!new_pkts)
- goto update_credit; /* No pkts at this moment */
+ new_pkts = otx_ep_rx_pkts_to_process(droq, nb_pkts);
for (pkts = 0; pkts < new_pkts; pkts++) {
/* Push the received pkt to application */
@@ -1006,7 +983,6 @@ otx_ep_recv_pkts(void *rx_queue,
droq->pkts_pending -= pkts;
/* Refill DROQ buffers */
-update_credit:
if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
int desc_refilled = otx_ep_droq_refill(droq);
@@ -1014,7 +990,7 @@ otx_ep_recv_pkts(void *rx_queue,
* that when we update the credits the data in memory is
* accurate.
*/
- rte_wmb();
+ rte_io_wmb();
rte_write32(desc_refilled, droq->pkts_credit_reg);
} else {
/*
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v5 3/3] net/octeon_ep: add new fastpath routines
2023-10-18 8:07 ` [PATCH v5 0/3] rewrite " Vamsi Attunuru
2023-10-18 8:07 ` [PATCH v5 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
2023-10-18 8:07 ` [PATCH v5 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
@ 2023-10-18 8:07 ` Vamsi Attunuru
2023-10-18 11:14 ` [PATCH v6 0/3] rewrite " Vamsi Attunuru
3 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-18 8:07 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
Adds new fastpath routines for cn10k & cn9k endpoint
devices and assigns the fastpath routines based on
the offload flags.
Patch also adds misc changes to improve performance
and code-readability.
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
---
drivers/net/octeon_ep/cnxk_ep_rx.c | 310 ++++++++++++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_tx.c | 210 +++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_vf.c | 2 +
drivers/net/octeon_ep/cnxk_ep_vf.h | 13 ++
drivers/net/octeon_ep/meson.build | 2 +
drivers/net/octeon_ep/otx2_ep_vf.c | 1 +
drivers/net/octeon_ep/otx_ep_common.h | 125 ++++++-----
drivers/net/octeon_ep/otx_ep_ethdev.c | 69 +++++-
drivers/net/octeon_ep/otx_ep_rxtx.c | 93 +-------
drivers/net/octeon_ep/otx_ep_rxtx.h | 38 +++-
10 files changed, 706 insertions(+), 157 deletions(-)
diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.c b/drivers/net/octeon_ep/cnxk_ep_rx.c
new file mode 100644
index 0000000000..22bf3ce7a7
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.c
@@ -0,0 +1,310 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "otx_ep_common.h"
+#include "otx2_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static inline int
+cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)
+{
+ struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t refill_idx = droq->refill_idx;
+ struct rte_mbuf *buf;
+ uint32_t i;
+ int rc;
+
+ rc = rte_pktmbuf_alloc_bulk(droq->mpool, &recv_buf_list[refill_idx], count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return rc;
+ }
+
+ for (i = 0; i < count; i++) {
+ buf = recv_buf_list[refill_idx];
+ desc_ring[refill_idx].buffer_ptr = rte_mbuf_data_iova_default(buf);
+ refill_idx++;
+ }
+
+ droq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, droq->nb_desc);
+ droq->refill_count -= count;
+
+ return 0;
+}
+
+static inline void
+cnxk_ep_rx_refill(struct otx_ep_droq *droq)
+{
+ uint32_t desc_refilled = 0, count;
+ uint32_t nb_desc = droq->nb_desc;
+ uint32_t refill_idx = droq->refill_idx;
+ int rc;
+
+ if (unlikely(droq->read_idx == refill_idx))
+ return;
+
+ if (refill_idx < droq->read_idx) {
+ count = droq->read_idx - refill_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+ desc_refilled = count;
+ } else {
+ count = nb_desc - refill_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+
+ desc_refilled = count;
+ count = droq->read_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+ desc_refilled += count;
+ }
+
+ /* Flush the droq descriptor data to memory to be sure
+ * that when we update the credits the data in memory is
+ * accurate.
+ */
+ rte_io_wmb();
+ rte_write32(desc_refilled, droq->pkts_credit_reg);
+}
+
+static inline uint32_t
+cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
+{
+ uint32_t new_pkts;
+ uint32_t val;
+
+ /* Batch subtractions from the HW counter to reduce PCIe traffic
+ * This adds an extra local variable, but almost halves the
+ * number of PCIe writes.
+ */
+ val = rte_atomic_load_explicit(droq->pkts_sent_ism, rte_memory_order_relaxed);
+ new_pkts = val - droq->pkts_sent_ism_prev;
+ droq->pkts_sent_ism_prev = val;
+
+ if (val > (uint32_t)(1 << 31)) {
+ /* Only subtract the packet count in the HW counter
+ * when count above halfway to saturation.
+ */
+ rte_write64((uint64_t)val, droq->pkts_sent_reg);
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ while (rte_atomic_load_explicit(droq->pkts_sent_ism, rte_memory_order_relaxed) >=
+ val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ rte_mb();
+ }
+
+ droq->pkts_sent_ism_prev = 0;
+ }
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ droq->pkts_pending += new_pkts;
+
+ return new_pkts;
+}
+
+static inline int16_t __rte_hot
+cnxk_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
+{
+ if (droq->pkts_pending < nb_pkts)
+ cnxk_ep_check_rx_pkts(droq);
+
+ return RTE_MIN(nb_pkts, droq->pkts_pending);
+}
+
+static __rte_always_inline void
+cnxk_ep_process_pkts_scalar(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, uint16_t new_pkts)
+{
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t bytes_rsvd = 0, read_idx = droq->read_idx;
+ uint16_t port_id = droq->otx_ep_dev->port_id;
+ uint16_t nb_desc = droq->nb_desc;
+ uint16_t pkts;
+
+ for (pkts = 0; pkts < new_pkts; pkts++) {
+ struct otx_ep_droq_info *info;
+ struct rte_mbuf *mbuf;
+ uint16_t pkt_len;
+
+ mbuf = recv_buf_list[read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+ read_idx = otx_ep_incr_index(read_idx, 1, nb_desc);
+ pkt_len = rte_bswap16(info->length >> 48);
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = pkt_len;
+ mbuf->data_len = pkt_len;
+ mbuf->port = port_id;
+ rx_pkts[pkts] = mbuf;
+ bytes_rsvd += pkt_len;
+ }
+ droq->read_idx = read_idx;
+
+ droq->refill_count += new_pkts;
+ droq->pkts_pending -= new_pkts;
+ /* Stats */
+ droq->stats.pkts_received += new_pkts;
+ droq->stats.bytes_received += bytes_rsvd;
+}
+
+static __rte_always_inline void
+cnxk_ep_process_pkts_scalar_mseg(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq,
+ uint16_t new_pkts)
+{
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t total_pkt_len, bytes_rsvd = 0;
+ uint16_t port_id = droq->otx_ep_dev->port_id;
+ uint16_t nb_desc = droq->nb_desc;
+ uint16_t pkts;
+
+ for (pkts = 0; pkts < new_pkts; pkts++) {
+ struct otx_ep_droq_info *info;
+ struct rte_mbuf *first_buf = NULL;
+ struct rte_mbuf *last_buf = NULL;
+ struct rte_mbuf *mbuf;
+ uint32_t pkt_len = 0;
+
+ mbuf = recv_buf_list[droq->read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
+ total_pkt_len = rte_bswap16(info->length >> 48) + OTX_EP_INFO_SIZE;
+
+ while (pkt_len < total_pkt_len) {
+ int cpy_len;
+
+ cpy_len = ((pkt_len + droq->buffer_size) > total_pkt_len)
+ ? ((uint32_t)total_pkt_len - pkt_len) : droq->buffer_size;
+
+ mbuf = droq->recv_buf_list[droq->read_idx];
+
+ if (!pkt_len) {
+ /* Note the first seg */
+ first_buf = mbuf;
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_len = cpy_len - OTX_EP_INFO_SIZE;
+ } else {
+ mbuf->pkt_len = cpy_len;
+ mbuf->data_len = cpy_len;
+ first_buf->nb_segs++;
+ first_buf->pkt_len += mbuf->pkt_len;
+ }
+
+ if (last_buf)
+ last_buf->next = mbuf;
+
+ last_buf = mbuf;
+
+ pkt_len += cpy_len;
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, nb_desc);
+ droq->refill_count++;
+ }
+ mbuf = first_buf;
+ mbuf->port = port_id;
+ rx_pkts[pkts] = mbuf;
+ bytes_rsvd += pkt_len;
+ }
+
+ droq->refill_count += new_pkts;
+ droq->pkts_pending -= pkts;
+ /* Stats */
+ droq->stats.pkts_received += pkts;
+ droq->stats.bytes_received += bytes_rsvd;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD)
+ cnxk_ep_rx_refill(droq);
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cn9k_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
+ cnxk_ep_rx_refill(droq);
+ } else {
+ /* SDP output goes into DROP state when output doorbell count
+ * goes below drop count. When door bell count is written with
+ * a value greater than drop count SDP output should come out
+ * of DROP state. Due to a race condition this is not happening.
+ * Writing doorbell register with 0 again may make SDP output
+ * come out of this state.
+ */
+
+ rte_write32(0, droq->pkts_credit_reg);
+ }
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar_mseg(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD)
+ cnxk_ep_rx_refill(droq);
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cn9k_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar_mseg(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
+ cnxk_ep_rx_refill(droq);
+ } else {
+ /* SDP output goes into DROP state when output doorbell count
+ * goes below drop count. When door bell count is written with
+ * a value greater than drop count SDP output should come out
+ * of DROP state. Due to a race condition this is not happening.
+ * Writing doorbell register with 0 again may make SDP output
+ * come out of this state.
+ */
+
+ rte_write32(0, droq->pkts_credit_reg);
+ }
+
+ return new_pkts;
+}
diff --git a/drivers/net/octeon_ep/cnxk_ep_tx.c b/drivers/net/octeon_ep/cnxk_ep_tx.c
new file mode 100644
index 0000000000..86f771ca7e
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_tx.c
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "cnxk_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static uint32_t
+cnxk_vf_update_read_index(struct otx_ep_instr_queue *iq)
+{
+ uint32_t val;
+
+ /* Batch subtractions from the HW counter to reduce PCIe traffic
+ * This adds an extra local variable, but almost halves the
+ * number of PCIe writes.
+ */
+ val = rte_atomic_load_explicit(iq->inst_cnt_ism, rte_memory_order_relaxed);
+ iq->inst_cnt += val - iq->inst_cnt_ism_prev;
+ iq->inst_cnt_ism_prev = val;
+
+ if (val > (uint32_t)(1 << 31)) {
+ /* Only subtract the packet count in the HW counter
+ * when count above halfway to saturation.
+ */
+ rte_write64((uint64_t)val, iq->inst_cnt_reg);
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ while (rte_atomic_load_explicit(iq->inst_cnt_ism, rte_memory_order_relaxed) >=
+ val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ rte_mb();
+ }
+
+ iq->inst_cnt_ism_prev = 0;
+ }
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+
+ /* Modulo of the new index with the IQ size will give us
+ * the new index.
+ */
+ return iq->inst_cnt & (iq->nb_desc - 1);
+}
+
+static inline void
+cnxk_ep_flush_iq(struct otx_ep_instr_queue *iq)
+{
+ uint32_t instr_processed = 0;
+ uint32_t cnt = 0;
+
+ iq->otx_read_index = cnxk_vf_update_read_index(iq);
+
+ if (unlikely(iq->flush_index == iq->otx_read_index))
+ return;
+
+ if (iq->flush_index < iq->otx_read_index) {
+ instr_processed = iq->otx_read_index - iq->flush_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc);
+ } else {
+ cnt = iq->nb_desc - iq->flush_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], cnt);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, cnt, iq->nb_desc);
+
+ instr_processed = iq->otx_read_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc);
+
+ instr_processed += cnt;
+ }
+
+ iq->stats.instr_processed = instr_processed;
+ iq->instr_pending -= instr_processed;
+}
+
+static inline void
+set_sg_size(struct otx_ep_sg_entry *sg_entry, uint16_t size, uint32_t pos)
+{
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ sg_entry->u.size[pos] = size;
+#elif RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+ sg_entry->u.size[(OTX_EP_NUM_SG_PTRS - 1) - pos] = size;
+#endif
+}
+
+static __rte_always_inline void
+cnxk_ep_xmit_pkts_scalar(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq, uint16_t nb_pkts)
+{
+ struct cnxk_ep_instr_32B *iqcmd;
+ struct rte_mbuf *m;
+ uint32_t pkt_len;
+ uint32_t tx_bytes = 0;
+ uint32_t write_idx = iq->host_write_index;
+ uint16_t pkts, nb_desc = iq->nb_desc;
+ uint8_t desc_size = iq->desc_size;
+
+ for (pkts = 0; pkts < nb_pkts; pkts++) {
+ m = tx_pkts[pkts];
+ iq->mbuf_list[write_idx] = m;
+ pkt_len = rte_pktmbuf_data_len(m);
+
+ iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size));
+ iqcmd->ih.u64 = iq->partial_ih | pkt_len;
+ iqcmd->dptr = rte_mbuf_data_iova(m); /*dptr*/
+ tx_bytes += pkt_len;
+
+ /* Increment the host write index */
+ write_idx = otx_ep_incr_index(write_idx, 1, nb_desc);
+ }
+ iq->host_write_index = write_idx;
+
+ /* ring dbell */
+ rte_io_wmb();
+ rte_write64(pkts, iq->doorbell_reg);
+ iq->instr_pending += pkts;
+ iq->stats.tx_pkts += pkts;
+ iq->stats.tx_bytes += tx_bytes;
+}
+
+static __rte_always_inline uint16_t
+cnxk_ep_xmit_pkts_scalar_mseg(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq,
+ uint16_t nb_pkts)
+{
+ uint16_t frags, num_sg, mask = OTX_EP_NUM_SG_PTRS - 1;
+ struct otx_ep_buf_free_info *finfo;
+ struct cnxk_ep_instr_32B *iqcmd;
+ struct rte_mbuf *m;
+ uint32_t pkt_len, tx_bytes = 0;
+ uint32_t write_idx = iq->host_write_index;
+ uint16_t pkts, nb_desc = iq->nb_desc;
+ uint8_t desc_size = iq->desc_size;
+
+ for (pkts = 0; pkts < nb_pkts; pkts++) {
+ uint16_t j = 0;
+
+ m = tx_pkts[pkts];
+ frags = m->nb_segs;
+
+ pkt_len = rte_pktmbuf_pkt_len(m);
+ num_sg = (frags + mask) / OTX_EP_NUM_SG_PTRS;
+
+ if (unlikely(pkt_len > OTX_EP_MAX_PKT_SZ && num_sg > OTX_EP_MAX_SG_LISTS)) {
+ otx_ep_err("Failed to xmit the pkt, pkt_len is higher or pkt has more segments\n");
+ goto exit;
+ }
+
+ finfo = &iq->req_list[write_idx].finfo;
+
+ iq->mbuf_list[write_idx] = m;
+ iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size));
+ iqcmd->dptr = rte_mem_virt2iova(finfo->g.sg);
+ iqcmd->ih.u64 = iq->partial_ih | (1ULL << 62) | ((uint64_t)frags << 48) | pkt_len;
+
+ while (frags--) {
+ finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
+ set_sg_size(&finfo->g.sg[(j >> 2)], m->data_len, (j & mask));
+ j++;
+ m = m->next;
+ }
+
+ /* Increment the host write index */
+ write_idx = otx_ep_incr_index(write_idx, 1, nb_desc);
+ tx_bytes += pkt_len;
+ }
+exit:
+ iq->host_write_index = write_idx;
+
+ /* ring dbell */
+ rte_io_wmb();
+ rte_write64(pkts, iq->doorbell_reg);
+ iq->instr_pending += pkts;
+ iq->stats.tx_pkts += pkts;
+ iq->stats.tx_bytes += tx_bytes;
+
+ return pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
+ uint16_t pkts;
+
+ pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending);
+
+ cnxk_ep_xmit_pkts_scalar(tx_pkts, iq, pkts);
+
+ if (iq->instr_pending >= OTX_EP_MAX_INSTR)
+ cnxk_ep_flush_iq(iq);
+
+ /* Return no# of instructions posted successfully. */
+ return pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
+ uint16_t pkts;
+
+ pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending);
+
+ pkts = cnxk_ep_xmit_pkts_scalar_mseg(tx_pkts, iq, pkts);
+
+ if (iq->instr_pending >= OTX_EP_MAX_INSTR)
+ cnxk_ep_flush_iq(iq);
+
+ /* Return no# of instructions posted successfully. */
+ return pkts;
+}
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 7b3669fe0c..ef275703c3 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -156,6 +156,8 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
(void *)iq->inst_cnt_ism, ism_addr);
*iq->inst_cnt_ism = 0;
iq->inst_cnt_ism_prev = 0;
+ iq->partial_ih = ((uint64_t)otx_ep->pkind) << 36;
+
return 0;
}
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.h b/drivers/net/octeon_ep/cnxk_ep_vf.h
index 86277449ea..41d8fbbb3a 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.h
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.h
@@ -6,6 +6,8 @@
#include <rte_io.h>
+#include "otx_ep_common.h"
+
#define CNXK_CONFIG_XPANSION_BAR 0x38
#define CNXK_CONFIG_PCIE_CAP 0x70
#define CNXK_CONFIG_PCIE_DEVCAP 0x74
@@ -178,6 +180,17 @@ struct cnxk_ep_instr_64B {
uint64_t exhdr[4];
};
+struct cnxk_ep_instr_32B {
+ /* Pointer where the input data is available. */
+ uint64_t dptr;
+
+ /* OTX_EP Instruction Header. */
+ union otx_ep_instr_ih ih;
+
+ /* Misc data bytes that can be passed as front data */
+ uint64_t rsvd[2];
+};
+
#define CNXK_EP_IQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue) + 4)
#define CNXK_EP_OQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue))
#define CNXK_EP_ISM_EN (0x1)
diff --git a/drivers/net/octeon_ep/meson.build b/drivers/net/octeon_ep/meson.build
index e698bf9792..749776d70c 100644
--- a/drivers/net/octeon_ep/meson.build
+++ b/drivers/net/octeon_ep/meson.build
@@ -9,4 +9,6 @@ sources = files(
'otx2_ep_vf.c',
'cnxk_ep_vf.c',
'otx_ep_mbox.c',
+ 'cnxk_ep_rx.c',
+ 'cnxk_ep_tx.c',
)
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c b/drivers/net/octeon_ep/otx2_ep_vf.c
index f72b8d25d7..7f4edf8dcf 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -307,6 +307,7 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
(unsigned int)ism_addr);
*iq->inst_cnt_ism = 0;
iq->inst_cnt_ism_prev = 0;
+ iq->partial_ih = ((uint64_t)otx_ep->pkind) << 36;
return 0;
}
diff --git a/drivers/net/octeon_ep/otx_ep_common.h b/drivers/net/octeon_ep/otx_ep_common.h
index 90e059cad0..82e57520d3 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -4,7 +4,20 @@
#ifndef _OTX_EP_COMMON_H_
#define _OTX_EP_COMMON_H_
+#include <rte_bitops.h>
#include <rte_spinlock.h>
+#include <unistd.h>
+#include <assert.h>
+#include <rte_eal.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_io.h>
+#include <rte_net.h>
+#include <ethdev_pci.h>
+
+#define OTX_EP_CN8XX RTE_BIT32(0)
+#define OTX_EP_CN9XX RTE_BIT32(1)
+#define OTX_EP_CN10XX RTE_BIT32(2)
#define OTX_EP_NW_PKT_OP 0x1220
#define OTX_EP_NW_CMD_OP 0x1221
@@ -38,7 +51,7 @@
#define OTX_EP_NORESP_OHSM_SEND (4)
#define OTX_EP_NORESP_LAST (4)
#define OTX_EP_PCI_RING_ALIGN 65536
-#define OTX_EP_MAX_SG_LISTS 4
+#define OTX_EP_MAX_SG_LISTS 6
#define OTX_EP_NUM_SG_PTRS 4
#define SDP_PKIND 40
#define SDP_OTX2_PKIND 57
@@ -203,6 +216,38 @@ struct otx_ep_iq_config {
* such structure to represent it.
*/
struct otx_ep_instr_queue {
+ /* Location in memory updated by SDP ISM */
+ uint32_t *inst_cnt_ism;
+ struct rte_mbuf **mbuf_list;
+ /* Pointer to the Virtual Base addr of the input ring. */
+ uint8_t *base_addr;
+
+ /* track inst count locally to consolidate HW counter updates */
+ uint32_t inst_cnt_ism_prev;
+
+ /* Input ring index, where the driver should write the next packet */
+ uint32_t host_write_index;
+
+ /* Input ring index, where the OCTEON 9 should read the next packet */
+ uint32_t otx_read_index;
+ /** This index aids in finding the window in the queue where OCTEON 9
+ * has read the commands.
+ */
+ uint32_t flush_index;
+ /* This keeps track of the instructions pending in this queue. */
+ uint64_t instr_pending;
+
+ /* Memory zone */
+ const struct rte_memzone *iq_mz;
+ /* OTX_EP doorbell register for the ring. */
+ void *doorbell_reg;
+
+ /* OTX_EP instruction count register for this ring. */
+ void *inst_cnt_reg;
+
+ /* Number of instructions pending to be posted to OCTEON 9. */
+ uint32_t fill_cnt;
+
struct otx_ep_device *otx_ep_dev;
uint32_t q_no;
@@ -219,54 +264,21 @@ struct otx_ep_instr_queue {
/* Size of the descriptor. */
uint8_t desc_size;
- /* Input ring index, where the driver should write the next packet */
- uint32_t host_write_index;
-
- /* Input ring index, where the OCTEON 9 should read the next packet */
- uint32_t otx_read_index;
-
uint32_t reset_instr_cnt;
- /** This index aids in finding the window in the queue where OCTEON 9
- * has read the commands.
- */
- uint32_t flush_index;
-
/* Free-running/wrapping instruction counter for IQ. */
uint32_t inst_cnt;
- /* This keeps track of the instructions pending in this queue. */
- uint64_t instr_pending;
-
- /* Pointer to the Virtual Base addr of the input ring. */
- uint8_t *base_addr;
+ uint64_t partial_ih;
/* This IQ request list */
struct otx_ep_instr_list *req_list;
- /* OTX_EP doorbell register for the ring. */
- void *doorbell_reg;
-
- /* OTX_EP instruction count register for this ring. */
- void *inst_cnt_reg;
-
- /* Number of instructions pending to be posted to OCTEON 9. */
- uint32_t fill_cnt;
-
/* Statistics for this input queue. */
struct otx_ep_iq_stats stats;
/* DMA mapped base address of the input descriptor ring. */
uint64_t base_addr_dma;
-
- /* Memory zone */
- const struct rte_memzone *iq_mz;
-
- /* Location in memory updated by SDP ISM */
- uint32_t *inst_cnt_ism;
-
- /* track inst count locally to consolidate HW counter updates */
- uint32_t inst_cnt_ism_prev;
};
/** Descriptor format.
@@ -344,14 +356,17 @@ struct otx_ep_oq_config {
/* The Descriptor Ring Output Queue(DROQ) structure. */
struct otx_ep_droq {
- struct otx_ep_device *otx_ep_dev;
/* The 8B aligned descriptor ring starts at this address. */
struct otx_ep_droq_desc *desc_ring;
- uint32_t q_no;
- uint64_t last_pkt_count;
+ /* The 8B aligned info ptrs begin from this address. */
+ struct otx_ep_droq_info *info_list;
- struct rte_mempool *mpool;
+ /* receive buffer list contains mbuf ptr list */
+ struct rte_mbuf **recv_buf_list;
+
+ /* Packets pending to be processed */
+ uint64_t pkts_pending;
/* Driver should read the next packet at this index */
uint32_t read_idx;
@@ -362,22 +377,17 @@ struct otx_ep_droq {
/* At this index, the driver will refill the descriptor's buffer */
uint32_t refill_idx;
- /* Packets pending to be processed */
- uint64_t pkts_pending;
+ /* The number of descriptors pending to refill. */
+ uint32_t refill_count;
/* Number of descriptors in this ring. */
uint32_t nb_desc;
- /* The number of descriptors pending to refill. */
- uint32_t refill_count;
-
uint32_t refill_threshold;
- /* The 8B aligned info ptrs begin from this address. */
- struct otx_ep_droq_info *info_list;
+ uint64_t last_pkt_count;
- /* receive buffer list contains mbuf ptr list */
- struct rte_mbuf **recv_buf_list;
+ struct rte_mempool *mpool;
/* The size of each buffer pointed by the buffer pointer. */
uint32_t buffer_size;
@@ -392,6 +402,13 @@ struct otx_ep_droq {
*/
void *pkts_sent_reg;
+ /* Pointer to host memory copy of output packet count, set by ISM */
+ uint32_t *pkts_sent_ism;
+ uint32_t pkts_sent_ism_prev;
+
+ /* Statistics for this DROQ. */
+ struct otx_ep_droq_stats stats;
+
/** Handle DMA incompletion during pkt reads.
* This variable is used to initiate a sent_reg_read
* that completes pending dma
@@ -400,8 +417,9 @@ struct otx_ep_droq {
*/
uint32_t sent_reg_val;
- /* Statistics for this DROQ. */
- struct otx_ep_droq_stats stats;
+ uint32_t q_no;
+
+ struct otx_ep_device *otx_ep_dev;
/* DMA mapped address of the DROQ descriptor ring. */
size_t desc_ring_dma;
@@ -419,10 +437,6 @@ struct otx_ep_droq {
const struct rte_memzone *desc_ring_mz;
const struct rte_memzone *info_mz;
-
- /* Pointer to host memory copy of output packet count, set by ISM */
- uint32_t *pkts_sent_ism;
- uint32_t pkts_sent_ism_prev;
};
#define OTX_EP_DROQ_SIZE (sizeof(struct otx_ep_droq))
@@ -545,6 +559,9 @@ struct otx_ep_device {
/* Negotiated Mbox version */
uint32_t mbox_neg_ver;
+
+ /* Generation */
+ uint32_t chip_gen;
};
int otx_ep_setup_iqs(struct otx_ep_device *otx_ep, uint32_t iq_no,
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c b/drivers/net/octeon_ep/otx_ep_ethdev.c
index 970372bbd7..615cbbb648 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -27,6 +27,46 @@ static const struct rte_eth_desc_lim otx_ep_tx_desc_lim = {
.nb_align = OTX_EP_TXD_ALIGN,
};
+static void
+otx_ep_set_tx_func(struct rte_eth_dev *eth_dev)
+{
+ struct otx_ep_device *otx_epvf = OTX_EP_DEV(eth_dev);
+
+ if (otx_epvf->chip_gen == OTX_EP_CN10XX || otx_epvf->chip_gen == OTX_EP_CN9XX) {
+ eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts;
+ if (otx_epvf->tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS)
+ eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts_mseg;
+ } else {
+ eth_dev->tx_pkt_burst = &otx_ep_xmit_pkts;
+ }
+
+ if (eth_dev->data->dev_started)
+ rte_eth_fp_ops[eth_dev->data->port_id].tx_pkt_burst =
+ eth_dev->tx_pkt_burst;
+}
+
+static void
+otx_ep_set_rx_func(struct rte_eth_dev *eth_dev)
+{
+ struct otx_ep_device *otx_epvf = OTX_EP_DEV(eth_dev);
+
+ if (otx_epvf->chip_gen == OTX_EP_CN10XX) {
+ eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts;
+ if (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts_mseg;
+ } else if (otx_epvf->chip_gen == OTX_EP_CN9XX) {
+ eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts;
+ if (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts_mseg;
+ } else {
+ eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
+ }
+
+ if (eth_dev->data->dev_started)
+ rte_eth_fp_ops[eth_dev->data->port_id].rx_pkt_burst =
+ eth_dev->rx_pkt_burst;
+}
+
static int
otx_ep_dev_info_get(struct rte_eth_dev *eth_dev,
struct rte_eth_dev_info *devinfo)
@@ -154,6 +194,10 @@ otx_ep_dev_start(struct rte_eth_dev *eth_dev)
}
otx_ep_dev_link_update(eth_dev, 0);
+
+ otx_ep_set_tx_func(eth_dev);
+ otx_ep_set_rx_func(eth_dev);
+
otx_ep_info("dev started\n");
for (q = 0; q < eth_dev->data->nb_rx_queues; q++)
@@ -266,18 +310,23 @@ otx_epdev_init(struct otx_ep_device *otx_epvf)
otx_epvf->fn_list.setup_device_regs(otx_epvf);
+ otx_epvf->eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts;
otx_epvf->eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
- if (otx_epvf->chip_id == PCI_DEVID_OCTEONTX_EP_VF)
+ if (otx_epvf->chip_id == PCI_DEVID_OCTEONTX_EP_VF) {
otx_epvf->eth_dev->tx_pkt_burst = &otx_ep_xmit_pkts;
- else if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
+ otx_epvf->chip_gen = OTX_EP_CN8XX;
+ } else if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CN98XX_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CNF95N_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF) {
- otx_epvf->eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
+ otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF) {
+ otx_epvf->eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts;
+ otx_epvf->chip_gen = OTX_EP_CN9XX;
+ } else if (otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF) {
+ otx_epvf->eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts;
+ otx_epvf->chip_gen = OTX_EP_CN10XX;
} else {
otx_ep_err("Invalid chip_id\n");
ret = -EINVAL;
@@ -667,8 +716,8 @@ otx_ep_eth_dev_init(struct rte_eth_dev *eth_dev)
/* Single process support */
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
eth_dev->dev_ops = &otx_ep_eth_dev_ops;
- eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
- eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
+ otx_ep_set_tx_func(eth_dev);
+ otx_ep_set_rx_func(eth_dev);
return 0;
}
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c b/drivers/net/octeon_ep/otx_ep_rxtx.c
index 2654e13e18..f53f0578ef 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -13,15 +13,8 @@
#include "otx_ep_common.h"
#include "otx_ep_vf.h"
-#include "otx2_ep_vf.h"
#include "otx_ep_rxtx.h"
-/* SDP_LENGTH_S specifies packet length and is of 8-byte size */
-#define OTX_EP_INFO_SIZE 8
-#define OTX_EP_FSZ_FS0 0
-#define DROQ_REFILL_THRESHOLD 16
-#define OTX2_SDP_REQUEST_ISM (0x1ULL << 63)
-
static void
otx_ep_dmazone_free(const struct rte_memzone *mz)
{
@@ -144,6 +137,13 @@ otx_ep_init_instr_queue(struct otx_ep_device *otx_ep, int iq_no, int num_descs,
iq_no, iq->base_addr, (unsigned long)iq->base_addr_dma,
iq->nb_desc);
+ iq->mbuf_list = rte_zmalloc_socket("mbuf_list", (iq->nb_desc * sizeof(struct rte_mbuf *)),
+ RTE_CACHE_LINE_SIZE, rte_socket_id());
+ if (!iq->mbuf_list) {
+ otx_ep_err("IQ[%d] mbuf_list alloc failed\n", iq_no);
+ goto iq_init_fail;
+ }
+
iq->otx_ep_dev = otx_ep;
iq->q_no = iq_no;
iq->fill_cnt = 0;
@@ -676,85 +676,6 @@ otx_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
return count;
}
-/* Enqueue requests/packets to OTX_EP IQ queue.
- * returns number of requests enqueued successfully
- */
-uint16_t
-otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
-{
- struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
- struct otx_ep_device *otx_ep = iq->otx_ep_dev;
- struct otx2_ep_instr_64B iqcmd2;
- uint32_t iqreq_type;
- struct rte_mbuf *m;
- uint32_t pkt_len;
- int count = 0;
- uint16_t i;
- int dbell;
- int index;
-
- iqcmd2.ih.u64 = 0;
- iqcmd2.irh.u64 = 0;
-
- /* ih invars */
- iqcmd2.ih.s.fsz = OTX_EP_FSZ_FS0;
- iqcmd2.ih.s.pkind = otx_ep->pkind; /* The SDK decided PKIND value */
- /* irh invars */
- iqcmd2.irh.s.opcode = OTX_EP_NW_PKT_OP;
-
- for (i = 0; i < nb_pkts; i++) {
- m = pkts[i];
- if (m->nb_segs == 1) {
- pkt_len = rte_pktmbuf_data_len(m);
- iqcmd2.ih.s.tlen = pkt_len + iqcmd2.ih.s.fsz;
- iqcmd2.dptr = rte_mbuf_data_iova(m); /*dptr*/
- iqcmd2.ih.s.gather = 0;
- iqcmd2.ih.s.gsz = 0;
- iqreq_type = OTX_EP_REQTYPE_NORESP_NET;
- } else {
- if (!(otx_ep->tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS))
- goto xmit_fail;
-
- if (unlikely(prepare_xmit_gather_list(iq, m, &iqcmd2.dptr, &iqcmd2.ih) < 0))
- goto xmit_fail;
-
- pkt_len = rte_pktmbuf_pkt_len(m);
- iqreq_type = OTX_EP_REQTYPE_NORESP_GATHER;
- }
-
- iqcmd2.irh.u64 = rte_bswap64(iqcmd2.irh.u64);
-
-#ifdef OTX_EP_IO_DEBUG
- otx_ep_dbg("After swapping\n");
- otx_ep_dbg("Word0 [dptr]: 0x%016lx\n",
- (unsigned long)iqcmd.dptr);
- otx_ep_dbg("Word1 [ihtx]: 0x%016lx\n", (unsigned long)iqcmd.ih);
- otx_ep_dbg("Word2 [pki_ih3]: 0x%016lx\n",
- (unsigned long)iqcmd.pki_ih3);
- otx_ep_dbg("Word3 [rptr]: 0x%016lx\n",
- (unsigned long)iqcmd.rptr);
- otx_ep_dbg("Word4 [irh]: 0x%016lx\n", (unsigned long)iqcmd.irh);
- otx_ep_dbg("Word5 [exhdr[0]]: 0x%016lx\n",
- (unsigned long)iqcmd.exhdr[0]);
-#endif
- index = iq->host_write_index;
- dbell = (i == (unsigned int)(nb_pkts - 1)) ? 1 : 0;
- if (otx_ep_send_data(otx_ep, iq, &iqcmd2, dbell))
- goto xmit_fail;
- otx_ep_iqreq_add(iq, m, iqreq_type, index);
- iq->stats.tx_pkts++;
- iq->stats.tx_bytes += pkt_len;
- count++;
- }
-
-xmit_fail:
- if (iq->instr_pending >= OTX_EP_MAX_INSTR)
- otx_ep_flush_iq(iq);
-
- /* Return no# of instructions posted successfully. */
- return count;
-}
-
static uint32_t
otx_ep_droq_refill(struct otx_ep_droq *droq)
{
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.h b/drivers/net/octeon_ep/otx_ep_rxtx.h
index 3f12527004..cb68ef3b41 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.h
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.h
@@ -7,29 +7,53 @@
#include <rte_byteorder.h>
-#define OTX_EP_RXD_ALIGN 2
-#define OTX_EP_TXD_ALIGN 2
+#define OTX_EP_RXD_ALIGN 8
+#define OTX_EP_TXD_ALIGN 8
#define OTX_EP_IQ_SEND_FAILED (-1)
#define OTX_EP_IQ_SEND_SUCCESS (0)
-#define OTX_EP_MAX_DELAYED_PKT_RETRIES 10000
+#define OTX_EP_MAX_DELAYED_PKT_RETRIES 10
#define OTX_EP_FSZ 28
#define OTX2_EP_FSZ 24
-#define OTX_EP_MAX_INSTR 16
+#define OTX_EP_MAX_INSTR 128
+
+/* SDP_LENGTH_S specifies packet length and is of 8-byte size */
+#define OTX_EP_INFO_SIZE 8
+#define DROQ_REFILL_THRESHOLD 16
+#define OTX2_SDP_REQUEST_ISM (0x1ULL << 63)
static inline uint32_t
otx_ep_incr_index(uint32_t index, uint32_t count, uint32_t max)
{
return ((index + count) & (max - 1));
}
+
uint16_t
otx_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
uint16_t
otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+otx_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cnxk_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+cnxk_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cnxk_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cn9k_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
uint16_t
-otx_ep_recv_pkts(void *rx_queue,
- struct rte_mbuf **rx_pkts,
- uint16_t budget);
+cn9k_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
#endif /* _OTX_EP_RXTX_H_ */
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v6 0/3] rewrite fastpath routines
2023-10-18 8:07 ` [PATCH v5 0/3] rewrite " Vamsi Attunuru
` (2 preceding siblings ...)
2023-10-18 8:07 ` [PATCH v5 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
@ 2023-10-18 11:14 ` Vamsi Attunuru
2023-10-18 11:14 ` [PATCH v6 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
` (2 more replies)
3 siblings, 3 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-18 11:14 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
This series adds new fastpath routines for cn10k & cn9k endpoint
devices and supports 32B Tx descriptor format which improves the
performance.
V6 changes:
- Use __atomic_xxx built-ins to fix CI build
V5 changes:
- Series rebased
v4 changes:
- Use rte_atomic_xxx instead of __atomic_xxx built-ins
v2 & v3 changes:
- Fixed CI
Shijith Thotton (1):
net/octeon_ep: support 32B IQ descriptor size
Vamsi Attunuru (2):
net/octeon_ep: clean up receive routine
net/octeon_ep: add new fastpath routines
drivers/net/octeon_ep/cnxk_ep_rx.c | 309 ++++++++++++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_tx.c | 209 +++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_vf.c | 12 +-
drivers/net/octeon_ep/cnxk_ep_vf.h | 13 ++
drivers/net/octeon_ep/meson.build | 2 +
drivers/net/octeon_ep/otx2_ep_vf.c | 11 +-
drivers/net/octeon_ep/otx_ep_common.h | 127 ++++++-----
drivers/net/octeon_ep/otx_ep_ethdev.c | 69 +++++-
drivers/net/octeon_ep/otx_ep_rxtx.c | 255 +++++++--------------
drivers/net/octeon_ep/otx_ep_rxtx.h | 38 +++-
drivers/net/octeon_ep/otx_ep_vf.c | 8 +
11 files changed, 801 insertions(+), 252 deletions(-)
create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx.c
create mode 100644 drivers/net/octeon_ep/cnxk_ep_tx.c
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v6 1/3] net/octeon_ep: support 32B IQ descriptor size
2023-10-18 11:14 ` [PATCH v6 0/3] rewrite " Vamsi Attunuru
@ 2023-10-18 11:14 ` Vamsi Attunuru
2023-10-18 11:14 ` [PATCH v6 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
2023-10-18 11:14 ` [PATCH v6 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
2 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-18 11:14 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton
From: Shijith Thotton <sthotton@marvell.com>
Update input queue setup to consider descriptor size in driver conf.
The default instruction size for otx2 and cnxk devices has been updated
to 32 bytes.
Signed-off-by: Shijith Thotton <sthotton@marvell.com>
---
drivers/net/octeon_ep/cnxk_ep_vf.c | 10 +++++++++-
drivers/net/octeon_ep/otx2_ep_vf.c | 10 +++++++++-
drivers/net/octeon_ep/otx_ep_common.h | 4 ++++
drivers/net/octeon_ep/otx_ep_vf.c | 8 ++++++++
4 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 92c2d2ca5c..7b3669fe0c 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -106,6 +106,14 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(CNXK_EP_R_IN_CTL_IS_64B);
+ else
+ reg_val |= CNXK_EP_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + CNXK_EP_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + CNXK_EP_R_IN_INSTR_RSIZE(iq_no));
@@ -354,7 +362,7 @@ static const struct otx_ep_config default_cnxk_ep_conf = {
/* IQ attributes */
.iq = {
.max_iqs = OTX_EP_CFG_IO_QUEUES,
- .instr_type = OTX_EP_64BYTE_INSTR,
+ .instr_type = OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c b/drivers/net/octeon_ep/otx2_ep_vf.c
index ced3a415a5..f72b8d25d7 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -256,6 +256,14 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(SDP_VF_R_IN_CTL_IS_64B);
+ else
+ reg_val |= SDP_VF_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + SDP_VF_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_RSIZE(iq_no));
@@ -500,7 +508,7 @@ static const struct otx_ep_config default_otx2_ep_conf = {
/* IQ attributes */
.iq = {
.max_iqs = OTX_EP_CFG_IO_QUEUES,
- .instr_type = OTX_EP_64BYTE_INSTR,
+ .instr_type = OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx_ep_common.h b/drivers/net/octeon_ep/otx_ep_common.h
index c150cbe619..90e059cad0 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -11,6 +11,7 @@
#define OTX_EP_MAX_RINGS_PER_VF (8)
#define OTX_EP_CFG_IO_QUEUES OTX_EP_MAX_RINGS_PER_VF
+#define OTX_EP_32BYTE_INSTR (32)
#define OTX_EP_64BYTE_INSTR (64)
/*
* Backpressure for SDP is configured on Octeon, and the minimum queue sizes
@@ -215,6 +216,9 @@ struct otx_ep_instr_queue {
/* Number of descriptors in this ring. */
uint32_t nb_desc;
+ /* Size of the descriptor. */
+ uint8_t desc_size;
+
/* Input ring index, where the driver should write the next packet */
uint32_t host_write_index;
diff --git a/drivers/net/octeon_ep/otx_ep_vf.c b/drivers/net/octeon_ep/otx_ep_vf.c
index 4f3538146b..236b7a874c 100644
--- a/drivers/net/octeon_ep/otx_ep_vf.c
+++ b/drivers/net/octeon_ep/otx_ep_vf.c
@@ -120,6 +120,14 @@ otx_ep_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
return -EIO;
}
+ /* Configure input queue instruction size. */
+ if (iq->desc_size == OTX_EP_32BYTE_INSTR)
+ reg_val &= ~(OTX_EP_R_IN_CTL_IS_64B);
+ else
+ reg_val |= OTX_EP_R_IN_CTL_IS_64B;
+ oct_ep_write64(reg_val, otx_ep->hw_addr + OTX_EP_R_IN_CONTROL(iq_no));
+ iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size */
otx_ep_write64(iq->base_addr_dma, otx_ep->hw_addr,
OTX_EP_R_IN_INSTR_BADDR(iq_no));
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v6 2/3] net/octeon_ep: clean up receive routine
2023-10-18 11:14 ` [PATCH v6 0/3] rewrite " Vamsi Attunuru
2023-10-18 11:14 ` [PATCH v6 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
@ 2023-10-18 11:14 ` Vamsi Attunuru
2023-10-18 11:14 ` [PATCH v6 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
2 siblings, 0 replies; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-18 11:14 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
Patch improves Rx routine and pkt count update routines,
packet count update routines need to drain inflight ISM
memory updates while decrementing the packet count register.
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
---
drivers/net/octeon_ep/otx_ep_rxtx.c | 162 ++++++++++++----------------
1 file changed, 68 insertions(+), 94 deletions(-)
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c b/drivers/net/octeon_ep/otx_ep_rxtx.c
index b37fc8109f..4c509a419f 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -442,7 +442,14 @@ otx_vf_update_read_index(struct otx_ep_instr_queue *iq)
* when count above halfway to saturation.
*/
rte_write32(val, iq->inst_cnt_reg);
- *iq->inst_cnt_ism = 0;
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ while (__atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ rte_mb();
+ }
+
iq->inst_cnt_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
@@ -567,9 +574,7 @@ prepare_xmit_gather_list(struct otx_ep_instr_queue *iq, struct rte_mbuf *m, uint
finfo = &iq->req_list[iq->host_write_index].finfo;
*dptr = rte_mem_virt2iova(finfo->g.sg);
- ih->s.tlen = pkt_len + ih->s.fsz;
- ih->s.gsz = frags;
- ih->s.gather = 1;
+ ih->u64 |= ((1ULL << 62) | ((uint64_t)frags << 48) | (pkt_len + ih->s.fsz));
while (frags--) {
finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
@@ -752,36 +757,26 @@ otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
static uint32_t
otx_ep_droq_refill(struct otx_ep_droq *droq)
{
- struct otx_ep_droq_desc *desc_ring;
+ struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
struct otx_ep_droq_info *info;
struct rte_mbuf *buf = NULL;
uint32_t desc_refilled = 0;
- desc_ring = droq->desc_ring;
-
while (droq->refill_count && (desc_refilled < droq->nb_desc)) {
- /* If a valid buffer exists (happens if there is no dispatch),
- * reuse the buffer, else allocate.
- */
- if (droq->recv_buf_list[droq->refill_idx] != NULL)
- break;
-
buf = rte_pktmbuf_alloc(droq->mpool);
/* If a buffer could not be allocated, no point in
* continuing
*/
- if (buf == NULL) {
+ if (unlikely(!buf)) {
droq->stats.rx_alloc_failure++;
break;
}
info = rte_pktmbuf_mtod(buf, struct otx_ep_droq_info *);
- memset(info, 0, sizeof(*info));
+ info->length = 0;
droq->recv_buf_list[droq->refill_idx] = buf;
desc_ring[droq->refill_idx].buffer_ptr =
rte_mbuf_data_iova_default(buf);
-
-
droq->refill_idx = otx_ep_incr_index(droq->refill_idx, 1,
droq->nb_desc);
@@ -793,21 +788,18 @@ otx_ep_droq_refill(struct otx_ep_droq *droq)
}
static struct rte_mbuf *
-otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
- struct otx_ep_droq *droq, int next_fetch)
+otx_ep_droq_read_packet(struct otx_ep_device *otx_ep, struct otx_ep_droq *droq, int next_fetch)
{
volatile struct otx_ep_droq_info *info;
- struct rte_mbuf *droq_pkt2 = NULL;
- struct rte_mbuf *droq_pkt = NULL;
- struct rte_net_hdr_lens hdr_lens;
- struct otx_ep_droq_info *info2;
+ struct rte_mbuf *mbuf_next = NULL;
+ struct rte_mbuf *mbuf = NULL;
uint64_t total_pkt_len;
uint32_t pkt_len = 0;
int next_idx;
- droq_pkt = droq->recv_buf_list[droq->read_idx];
- droq_pkt2 = droq->recv_buf_list[droq->read_idx];
- info = rte_pktmbuf_mtod(droq_pkt, struct otx_ep_droq_info *);
+ mbuf = droq->recv_buf_list[droq->read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
/* make sure info is available */
rte_rmb();
if (unlikely(!info->length)) {
@@ -828,32 +820,25 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
assert(0);
}
}
+
if (next_fetch) {
next_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
- droq_pkt2 = droq->recv_buf_list[next_idx];
- info2 = rte_pktmbuf_mtod(droq_pkt2, struct otx_ep_droq_info *);
- rte_prefetch_non_temporal((const void *)info2);
+ mbuf_next = droq->recv_buf_list[next_idx];
+ rte_prefetch0(rte_pktmbuf_mtod(mbuf_next, void *));
}
- info->length = rte_bswap64(info->length);
+ info->length = rte_bswap16(info->length >> 48);
/* Deduce the actual data size */
total_pkt_len = info->length + OTX_EP_INFO_SIZE;
if (total_pkt_len <= droq->buffer_size) {
- droq_pkt = droq->recv_buf_list[droq->read_idx];
- if (likely(droq_pkt != NULL)) {
- droq_pkt->data_off += OTX_EP_INFO_SIZE;
- /* otx_ep_dbg("OQ: pkt_len[%ld], buffer_size %d\n",
- * (long)info->length, droq->buffer_size);
- */
- pkt_len = (uint32_t)info->length;
- droq_pkt->pkt_len = pkt_len;
- droq_pkt->data_len = pkt_len;
- droq_pkt->port = otx_ep->port_id;
- droq->recv_buf_list[droq->read_idx] = NULL;
- droq->read_idx = otx_ep_incr_index(droq->read_idx, 1,
- droq->nb_desc);
- droq->refill_count++;
- }
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ pkt_len = (uint32_t)info->length;
+ mbuf->pkt_len = pkt_len;
+ mbuf->data_len = pkt_len;
+ mbuf->port = otx_ep->port_id;
+ droq->recv_buf_list[droq->read_idx] = NULL;
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
+ droq->refill_count++;
} else {
struct rte_mbuf *first_buf = NULL;
struct rte_mbuf *last_buf = NULL;
@@ -865,61 +850,50 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
while (pkt_len < total_pkt_len) {
int cpy_len = 0;
- cpy_len = ((pkt_len + droq->buffer_size) >
- total_pkt_len)
- ? ((uint32_t)total_pkt_len -
- pkt_len)
+ cpy_len = ((pkt_len + droq->buffer_size) > total_pkt_len)
+ ? ((uint32_t)total_pkt_len - pkt_len)
: droq->buffer_size;
- droq_pkt = droq->recv_buf_list[droq->read_idx];
+ mbuf = droq->recv_buf_list[droq->read_idx];
droq->recv_buf_list[droq->read_idx] = NULL;
- if (likely(droq_pkt != NULL)) {
+ if (likely(mbuf)) {
/* Note the first seg */
if (!pkt_len)
- first_buf = droq_pkt;
+ first_buf = mbuf;
- droq_pkt->port = otx_ep->port_id;
+ mbuf->port = otx_ep->port_id;
if (!pkt_len) {
- droq_pkt->data_off +=
- OTX_EP_INFO_SIZE;
- droq_pkt->pkt_len =
- cpy_len - OTX_EP_INFO_SIZE;
- droq_pkt->data_len =
- cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_len = cpy_len - OTX_EP_INFO_SIZE;
} else {
- droq_pkt->pkt_len = cpy_len;
- droq_pkt->data_len = cpy_len;
+ mbuf->pkt_len = cpy_len;
+ mbuf->data_len = cpy_len;
}
if (pkt_len) {
first_buf->nb_segs++;
- first_buf->pkt_len += droq_pkt->pkt_len;
+ first_buf->pkt_len += mbuf->pkt_len;
}
if (last_buf)
- last_buf->next = droq_pkt;
+ last_buf->next = mbuf;
- last_buf = droq_pkt;
+ last_buf = mbuf;
} else {
otx_ep_err("no buf\n");
assert(0);
}
pkt_len += cpy_len;
- droq->read_idx = otx_ep_incr_index(droq->read_idx, 1,
- droq->nb_desc);
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
droq->refill_count++;
}
- droq_pkt = first_buf;
+ mbuf = first_buf;
}
- droq_pkt->packet_type = rte_net_get_ptype(droq_pkt, &hdr_lens,
- RTE_PTYPE_ALL_MASK);
- droq_pkt->l2_len = hdr_lens.l2_len;
- droq_pkt->l3_len = hdr_lens.l3_len;
- droq_pkt->l4_len = hdr_lens.l4_len;
- return droq_pkt;
+ return mbuf;
}
static inline uint32_t
@@ -943,7 +917,14 @@ otx_ep_check_droq_pkts(struct otx_ep_droq *droq)
* when count above halfway to saturation.
*/
rte_write32(val, droq->pkts_sent_reg);
- *droq->pkts_sent_ism = 0;
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ while (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ rte_mb();
+ }
+
droq->pkts_sent_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
@@ -952,36 +933,30 @@ otx_ep_check_droq_pkts(struct otx_ep_droq *droq)
return new_pkts;
}
+static inline int32_t __rte_hot
+otx_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
+{
+ if (unlikely(droq->pkts_pending < nb_pkts))
+ otx_ep_check_droq_pkts(droq);
+
+ return RTE_MIN(nb_pkts, droq->pkts_pending);
+}
+
/* Check for response arrival from OCTEON 9
* returns number of requests completed
*/
uint16_t
-otx_ep_recv_pkts(void *rx_queue,
- struct rte_mbuf **rx_pkts,
- uint16_t budget)
+otx_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
{
struct otx_ep_droq *droq = rx_queue;
struct otx_ep_device *otx_ep;
struct rte_mbuf *oq_pkt;
-
- uint32_t pkts = 0;
+ uint16_t pkts, new_pkts;
uint32_t valid_pkts = 0;
- uint32_t new_pkts = 0;
int next_fetch;
otx_ep = droq->otx_ep_dev;
-
- if (droq->pkts_pending > budget) {
- new_pkts = budget;
- } else {
- new_pkts = droq->pkts_pending;
- new_pkts += otx_ep_check_droq_pkts(droq);
- if (new_pkts > budget)
- new_pkts = budget;
- }
-
- if (!new_pkts)
- goto update_credit; /* No pkts at this moment */
+ new_pkts = otx_ep_rx_pkts_to_process(droq, nb_pkts);
for (pkts = 0; pkts < new_pkts; pkts++) {
/* Push the received pkt to application */
@@ -1006,7 +981,6 @@ otx_ep_recv_pkts(void *rx_queue,
droq->pkts_pending -= pkts;
/* Refill DROQ buffers */
-update_credit:
if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
int desc_refilled = otx_ep_droq_refill(droq);
@@ -1014,7 +988,7 @@ otx_ep_recv_pkts(void *rx_queue,
* that when we update the credits the data in memory is
* accurate.
*/
- rte_wmb();
+ rte_io_wmb();
rte_write32(desc_refilled, droq->pkts_credit_reg);
} else {
/*
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v6 3/3] net/octeon_ep: add new fastpath routines
2023-10-18 11:14 ` [PATCH v6 0/3] rewrite " Vamsi Attunuru
2023-10-18 11:14 ` [PATCH v6 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
2023-10-18 11:14 ` [PATCH v6 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
@ 2023-10-18 11:14 ` Vamsi Attunuru
2023-10-19 3:03 ` Jerin Jacob
2 siblings, 1 reply; 26+ messages in thread
From: Vamsi Attunuru @ 2023-10-18 11:14 UTC (permalink / raw)
To: dev, jerinj; +Cc: sthotton, Vamsi Attunuru
Adds new fastpath routines for cn10k & cn9k endpoint
devices and assigns the fastpath routines based on
the offload flags.
Patch also adds misc changes to improve performance
and code-readability.
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
---
drivers/net/octeon_ep/cnxk_ep_rx.c | 309 ++++++++++++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_tx.c | 209 +++++++++++++++++
drivers/net/octeon_ep/cnxk_ep_vf.c | 2 +
drivers/net/octeon_ep/cnxk_ep_vf.h | 13 ++
drivers/net/octeon_ep/meson.build | 2 +
drivers/net/octeon_ep/otx2_ep_vf.c | 1 +
drivers/net/octeon_ep/otx_ep_common.h | 125 ++++++-----
drivers/net/octeon_ep/otx_ep_ethdev.c | 69 +++++-
drivers/net/octeon_ep/otx_ep_rxtx.c | 93 +-------
drivers/net/octeon_ep/otx_ep_rxtx.h | 38 +++-
10 files changed, 704 insertions(+), 157 deletions(-)
diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.c b/drivers/net/octeon_ep/cnxk_ep_rx.c
new file mode 100644
index 0000000000..74f0011283
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.c
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "otx_ep_common.h"
+#include "otx2_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static inline int
+cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)
+{
+ struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t refill_idx = droq->refill_idx;
+ struct rte_mbuf *buf;
+ uint32_t i;
+ int rc;
+
+ rc = rte_pktmbuf_alloc_bulk(droq->mpool, &recv_buf_list[refill_idx], count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return rc;
+ }
+
+ for (i = 0; i < count; i++) {
+ buf = recv_buf_list[refill_idx];
+ desc_ring[refill_idx].buffer_ptr = rte_mbuf_data_iova_default(buf);
+ refill_idx++;
+ }
+
+ droq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, droq->nb_desc);
+ droq->refill_count -= count;
+
+ return 0;
+}
+
+static inline void
+cnxk_ep_rx_refill(struct otx_ep_droq *droq)
+{
+ uint32_t desc_refilled = 0, count;
+ uint32_t nb_desc = droq->nb_desc;
+ uint32_t refill_idx = droq->refill_idx;
+ int rc;
+
+ if (unlikely(droq->read_idx == refill_idx))
+ return;
+
+ if (refill_idx < droq->read_idx) {
+ count = droq->read_idx - refill_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+ desc_refilled = count;
+ } else {
+ count = nb_desc - refill_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+
+ desc_refilled = count;
+ count = droq->read_idx;
+ rc = cnxk_ep_rx_refill_mbuf(droq, count);
+ if (unlikely(rc)) {
+ droq->stats.rx_alloc_failure++;
+ return;
+ }
+ desc_refilled += count;
+ }
+
+ /* Flush the droq descriptor data to memory to be sure
+ * that when we update the credits the data in memory is
+ * accurate.
+ */
+ rte_io_wmb();
+ rte_write32(desc_refilled, droq->pkts_credit_reg);
+}
+
+static inline uint32_t
+cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
+{
+ uint32_t new_pkts;
+ uint32_t val;
+
+ /* Batch subtractions from the HW counter to reduce PCIe traffic
+ * This adds an extra local variable, but almost halves the
+ * number of PCIe writes.
+ */
+ val = __atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED);
+ new_pkts = val - droq->pkts_sent_ism_prev;
+ droq->pkts_sent_ism_prev = val;
+
+ if (val > (uint32_t)(1 << 31)) {
+ /* Only subtract the packet count in the HW counter
+ * when count above halfway to saturation.
+ */
+ rte_write64((uint64_t)val, droq->pkts_sent_reg);
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ while (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ rte_mb();
+ }
+
+ droq->pkts_sent_ism_prev = 0;
+ }
+ rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+ droq->pkts_pending += new_pkts;
+
+ return new_pkts;
+}
+
+static inline int16_t __rte_hot
+cnxk_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
+{
+ if (droq->pkts_pending < nb_pkts)
+ cnxk_ep_check_rx_pkts(droq);
+
+ return RTE_MIN(nb_pkts, droq->pkts_pending);
+}
+
+static __rte_always_inline void
+cnxk_ep_process_pkts_scalar(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, uint16_t new_pkts)
+{
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t bytes_rsvd = 0, read_idx = droq->read_idx;
+ uint16_t port_id = droq->otx_ep_dev->port_id;
+ uint16_t nb_desc = droq->nb_desc;
+ uint16_t pkts;
+
+ for (pkts = 0; pkts < new_pkts; pkts++) {
+ struct otx_ep_droq_info *info;
+ struct rte_mbuf *mbuf;
+ uint16_t pkt_len;
+
+ mbuf = recv_buf_list[read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+ read_idx = otx_ep_incr_index(read_idx, 1, nb_desc);
+ pkt_len = rte_bswap16(info->length >> 48);
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = pkt_len;
+ mbuf->data_len = pkt_len;
+ mbuf->port = port_id;
+ rx_pkts[pkts] = mbuf;
+ bytes_rsvd += pkt_len;
+ }
+ droq->read_idx = read_idx;
+
+ droq->refill_count += new_pkts;
+ droq->pkts_pending -= new_pkts;
+ /* Stats */
+ droq->stats.pkts_received += new_pkts;
+ droq->stats.bytes_received += bytes_rsvd;
+}
+
+static __rte_always_inline void
+cnxk_ep_process_pkts_scalar_mseg(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq,
+ uint16_t new_pkts)
+{
+ struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+ uint32_t total_pkt_len, bytes_rsvd = 0;
+ uint16_t port_id = droq->otx_ep_dev->port_id;
+ uint16_t nb_desc = droq->nb_desc;
+ uint16_t pkts;
+
+ for (pkts = 0; pkts < new_pkts; pkts++) {
+ struct otx_ep_droq_info *info;
+ struct rte_mbuf *first_buf = NULL;
+ struct rte_mbuf *last_buf = NULL;
+ struct rte_mbuf *mbuf;
+ uint32_t pkt_len = 0;
+
+ mbuf = recv_buf_list[droq->read_idx];
+ info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
+ total_pkt_len = rte_bswap16(info->length >> 48) + OTX_EP_INFO_SIZE;
+
+ while (pkt_len < total_pkt_len) {
+ int cpy_len;
+
+ cpy_len = ((pkt_len + droq->buffer_size) > total_pkt_len)
+ ? ((uint32_t)total_pkt_len - pkt_len) : droq->buffer_size;
+
+ mbuf = droq->recv_buf_list[droq->read_idx];
+
+ if (!pkt_len) {
+ /* Note the first seg */
+ first_buf = mbuf;
+ mbuf->data_off += OTX_EP_INFO_SIZE;
+ mbuf->pkt_len = cpy_len - OTX_EP_INFO_SIZE;
+ mbuf->data_len = cpy_len - OTX_EP_INFO_SIZE;
+ } else {
+ mbuf->pkt_len = cpy_len;
+ mbuf->data_len = cpy_len;
+ first_buf->nb_segs++;
+ first_buf->pkt_len += mbuf->pkt_len;
+ }
+
+ if (last_buf)
+ last_buf->next = mbuf;
+
+ last_buf = mbuf;
+
+ pkt_len += cpy_len;
+ droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, nb_desc);
+ droq->refill_count++;
+ }
+ mbuf = first_buf;
+ mbuf->port = port_id;
+ rx_pkts[pkts] = mbuf;
+ bytes_rsvd += pkt_len;
+ }
+
+ droq->refill_count += new_pkts;
+ droq->pkts_pending -= pkts;
+ /* Stats */
+ droq->stats.pkts_received += pkts;
+ droq->stats.bytes_received += bytes_rsvd;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD)
+ cnxk_ep_rx_refill(droq);
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cn9k_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
+ cnxk_ep_rx_refill(droq);
+ } else {
+ /* SDP output goes into DROP state when output doorbell count
+ * goes below drop count. When door bell count is written with
+ * a value greater than drop count SDP output should come out
+ * of DROP state. Due to a race condition this is not happening.
+ * Writing doorbell register with 0 again may make SDP output
+ * come out of this state.
+ */
+
+ rte_write32(0, droq->pkts_credit_reg);
+ }
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar_mseg(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD)
+ cnxk_ep_rx_refill(droq);
+
+ return new_pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cn9k_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+ uint16_t new_pkts;
+
+ new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+ cnxk_ep_process_pkts_scalar_mseg(rx_pkts, droq, new_pkts);
+
+ /* Refill RX buffers */
+ if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
+ cnxk_ep_rx_refill(droq);
+ } else {
+ /* SDP output goes into DROP state when output doorbell count
+ * goes below drop count. When door bell count is written with
+ * a value greater than drop count SDP output should come out
+ * of DROP state. Due to a race condition this is not happening.
+ * Writing doorbell register with 0 again may make SDP output
+ * come out of this state.
+ */
+
+ rte_write32(0, droq->pkts_credit_reg);
+ }
+
+ return new_pkts;
+}
diff --git a/drivers/net/octeon_ep/cnxk_ep_tx.c b/drivers/net/octeon_ep/cnxk_ep_tx.c
new file mode 100644
index 0000000000..9f11a2f317
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_tx.c
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "cnxk_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static uint32_t
+cnxk_vf_update_read_index(struct otx_ep_instr_queue *iq)
+{
+ uint32_t val;
+
+ /* Batch subtractions from the HW counter to reduce PCIe traffic
+ * This adds an extra local variable, but almost halves the
+ * number of PCIe writes.
+ */
+ val = __atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED);
+ iq->inst_cnt += val - iq->inst_cnt_ism_prev;
+ iq->inst_cnt_ism_prev = val;
+
+ if (val > (uint32_t)(1 << 31)) {
+ /* Only subtract the packet count in the HW counter
+ * when count above halfway to saturation.
+ */
+ rte_write64((uint64_t)val, iq->inst_cnt_reg);
+ rte_mb();
+
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ while (__atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED) >= val) {
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+ rte_mb();
+ }
+
+ iq->inst_cnt_ism_prev = 0;
+ }
+ rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+
+ /* Modulo of the new index with the IQ size will give us
+ * the new index.
+ */
+ return iq->inst_cnt & (iq->nb_desc - 1);
+}
+
+static inline void
+cnxk_ep_flush_iq(struct otx_ep_instr_queue *iq)
+{
+ uint32_t instr_processed = 0;
+ uint32_t cnt = 0;
+
+ iq->otx_read_index = cnxk_vf_update_read_index(iq);
+
+ if (unlikely(iq->flush_index == iq->otx_read_index))
+ return;
+
+ if (iq->flush_index < iq->otx_read_index) {
+ instr_processed = iq->otx_read_index - iq->flush_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc);
+ } else {
+ cnt = iq->nb_desc - iq->flush_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], cnt);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, cnt, iq->nb_desc);
+
+ instr_processed = iq->otx_read_index;
+ rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed);
+ iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc);
+
+ instr_processed += cnt;
+ }
+
+ iq->stats.instr_processed = instr_processed;
+ iq->instr_pending -= instr_processed;
+}
+
+static inline void
+set_sg_size(struct otx_ep_sg_entry *sg_entry, uint16_t size, uint32_t pos)
+{
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ sg_entry->u.size[pos] = size;
+#elif RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+ sg_entry->u.size[(OTX_EP_NUM_SG_PTRS - 1) - pos] = size;
+#endif
+}
+
+static __rte_always_inline void
+cnxk_ep_xmit_pkts_scalar(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq, uint16_t nb_pkts)
+{
+ struct cnxk_ep_instr_32B *iqcmd;
+ struct rte_mbuf *m;
+ uint32_t pkt_len;
+ uint32_t tx_bytes = 0;
+ uint32_t write_idx = iq->host_write_index;
+ uint16_t pkts, nb_desc = iq->nb_desc;
+ uint8_t desc_size = iq->desc_size;
+
+ for (pkts = 0; pkts < nb_pkts; pkts++) {
+ m = tx_pkts[pkts];
+ iq->mbuf_list[write_idx] = m;
+ pkt_len = rte_pktmbuf_data_len(m);
+
+ iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size));
+ iqcmd->ih.u64 = iq->partial_ih | pkt_len;
+ iqcmd->dptr = rte_mbuf_data_iova(m); /*dptr*/
+ tx_bytes += pkt_len;
+
+ /* Increment the host write index */
+ write_idx = otx_ep_incr_index(write_idx, 1, nb_desc);
+ }
+ iq->host_write_index = write_idx;
+
+ /* ring dbell */
+ rte_io_wmb();
+ rte_write64(pkts, iq->doorbell_reg);
+ iq->instr_pending += pkts;
+ iq->stats.tx_pkts += pkts;
+ iq->stats.tx_bytes += tx_bytes;
+}
+
+static __rte_always_inline uint16_t
+cnxk_ep_xmit_pkts_scalar_mseg(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq,
+ uint16_t nb_pkts)
+{
+ uint16_t frags, num_sg, mask = OTX_EP_NUM_SG_PTRS - 1;
+ struct otx_ep_buf_free_info *finfo;
+ struct cnxk_ep_instr_32B *iqcmd;
+ struct rte_mbuf *m;
+ uint32_t pkt_len, tx_bytes = 0;
+ uint32_t write_idx = iq->host_write_index;
+ uint16_t pkts, nb_desc = iq->nb_desc;
+ uint8_t desc_size = iq->desc_size;
+
+ for (pkts = 0; pkts < nb_pkts; pkts++) {
+ uint16_t j = 0;
+
+ m = tx_pkts[pkts];
+ frags = m->nb_segs;
+
+ pkt_len = rte_pktmbuf_pkt_len(m);
+ num_sg = (frags + mask) / OTX_EP_NUM_SG_PTRS;
+
+ if (unlikely(pkt_len > OTX_EP_MAX_PKT_SZ && num_sg > OTX_EP_MAX_SG_LISTS)) {
+ otx_ep_err("Failed to xmit the pkt, pkt_len is higher or pkt has more segments\n");
+ goto exit;
+ }
+
+ finfo = &iq->req_list[write_idx].finfo;
+
+ iq->mbuf_list[write_idx] = m;
+ iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size));
+ iqcmd->dptr = rte_mem_virt2iova(finfo->g.sg);
+ iqcmd->ih.u64 = iq->partial_ih | (1ULL << 62) | ((uint64_t)frags << 48) | pkt_len;
+
+ while (frags--) {
+ finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
+ set_sg_size(&finfo->g.sg[(j >> 2)], m->data_len, (j & mask));
+ j++;
+ m = m->next;
+ }
+
+ /* Increment the host write index */
+ write_idx = otx_ep_incr_index(write_idx, 1, nb_desc);
+ tx_bytes += pkt_len;
+ }
+exit:
+ iq->host_write_index = write_idx;
+
+ /* ring dbell */
+ rte_io_wmb();
+ rte_write64(pkts, iq->doorbell_reg);
+ iq->instr_pending += pkts;
+ iq->stats.tx_pkts += pkts;
+ iq->stats.tx_bytes += tx_bytes;
+
+ return pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
+ uint16_t pkts;
+
+ pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending);
+
+ cnxk_ep_xmit_pkts_scalar(tx_pkts, iq, pkts);
+
+ if (iq->instr_pending >= OTX_EP_MAX_INSTR)
+ cnxk_ep_flush_iq(iq);
+
+ /* Return no# of instructions posted successfully. */
+ return pkts;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
+ uint16_t pkts;
+
+ pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending);
+
+ pkts = cnxk_ep_xmit_pkts_scalar_mseg(tx_pkts, iq, pkts);
+
+ if (iq->instr_pending >= OTX_EP_MAX_INSTR)
+ cnxk_ep_flush_iq(iq);
+
+ /* Return no# of instructions posted successfully. */
+ return pkts;
+}
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 7b3669fe0c..ef275703c3 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -156,6 +156,8 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
(void *)iq->inst_cnt_ism, ism_addr);
*iq->inst_cnt_ism = 0;
iq->inst_cnt_ism_prev = 0;
+ iq->partial_ih = ((uint64_t)otx_ep->pkind) << 36;
+
return 0;
}
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.h b/drivers/net/octeon_ep/cnxk_ep_vf.h
index 86277449ea..41d8fbbb3a 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.h
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.h
@@ -6,6 +6,8 @@
#include <rte_io.h>
+#include "otx_ep_common.h"
+
#define CNXK_CONFIG_XPANSION_BAR 0x38
#define CNXK_CONFIG_PCIE_CAP 0x70
#define CNXK_CONFIG_PCIE_DEVCAP 0x74
@@ -178,6 +180,17 @@ struct cnxk_ep_instr_64B {
uint64_t exhdr[4];
};
+struct cnxk_ep_instr_32B {
+ /* Pointer where the input data is available. */
+ uint64_t dptr;
+
+ /* OTX_EP Instruction Header. */
+ union otx_ep_instr_ih ih;
+
+ /* Misc data bytes that can be passed as front data */
+ uint64_t rsvd[2];
+};
+
#define CNXK_EP_IQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue) + 4)
#define CNXK_EP_OQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue))
#define CNXK_EP_ISM_EN (0x1)
diff --git a/drivers/net/octeon_ep/meson.build b/drivers/net/octeon_ep/meson.build
index e698bf9792..749776d70c 100644
--- a/drivers/net/octeon_ep/meson.build
+++ b/drivers/net/octeon_ep/meson.build
@@ -9,4 +9,6 @@ sources = files(
'otx2_ep_vf.c',
'cnxk_ep_vf.c',
'otx_ep_mbox.c',
+ 'cnxk_ep_rx.c',
+ 'cnxk_ep_tx.c',
)
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c b/drivers/net/octeon_ep/otx2_ep_vf.c
index f72b8d25d7..7f4edf8dcf 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -307,6 +307,7 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
(unsigned int)ism_addr);
*iq->inst_cnt_ism = 0;
iq->inst_cnt_ism_prev = 0;
+ iq->partial_ih = ((uint64_t)otx_ep->pkind) << 36;
return 0;
}
diff --git a/drivers/net/octeon_ep/otx_ep_common.h b/drivers/net/octeon_ep/otx_ep_common.h
index 90e059cad0..82e57520d3 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -4,7 +4,20 @@
#ifndef _OTX_EP_COMMON_H_
#define _OTX_EP_COMMON_H_
+#include <rte_bitops.h>
#include <rte_spinlock.h>
+#include <unistd.h>
+#include <assert.h>
+#include <rte_eal.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_io.h>
+#include <rte_net.h>
+#include <ethdev_pci.h>
+
+#define OTX_EP_CN8XX RTE_BIT32(0)
+#define OTX_EP_CN9XX RTE_BIT32(1)
+#define OTX_EP_CN10XX RTE_BIT32(2)
#define OTX_EP_NW_PKT_OP 0x1220
#define OTX_EP_NW_CMD_OP 0x1221
@@ -38,7 +51,7 @@
#define OTX_EP_NORESP_OHSM_SEND (4)
#define OTX_EP_NORESP_LAST (4)
#define OTX_EP_PCI_RING_ALIGN 65536
-#define OTX_EP_MAX_SG_LISTS 4
+#define OTX_EP_MAX_SG_LISTS 6
#define OTX_EP_NUM_SG_PTRS 4
#define SDP_PKIND 40
#define SDP_OTX2_PKIND 57
@@ -203,6 +216,38 @@ struct otx_ep_iq_config {
* such structure to represent it.
*/
struct otx_ep_instr_queue {
+ /* Location in memory updated by SDP ISM */
+ uint32_t *inst_cnt_ism;
+ struct rte_mbuf **mbuf_list;
+ /* Pointer to the Virtual Base addr of the input ring. */
+ uint8_t *base_addr;
+
+ /* track inst count locally to consolidate HW counter updates */
+ uint32_t inst_cnt_ism_prev;
+
+ /* Input ring index, where the driver should write the next packet */
+ uint32_t host_write_index;
+
+ /* Input ring index, where the OCTEON 9 should read the next packet */
+ uint32_t otx_read_index;
+ /** This index aids in finding the window in the queue where OCTEON 9
+ * has read the commands.
+ */
+ uint32_t flush_index;
+ /* This keeps track of the instructions pending in this queue. */
+ uint64_t instr_pending;
+
+ /* Memory zone */
+ const struct rte_memzone *iq_mz;
+ /* OTX_EP doorbell register for the ring. */
+ void *doorbell_reg;
+
+ /* OTX_EP instruction count register for this ring. */
+ void *inst_cnt_reg;
+
+ /* Number of instructions pending to be posted to OCTEON 9. */
+ uint32_t fill_cnt;
+
struct otx_ep_device *otx_ep_dev;
uint32_t q_no;
@@ -219,54 +264,21 @@ struct otx_ep_instr_queue {
/* Size of the descriptor. */
uint8_t desc_size;
- /* Input ring index, where the driver should write the next packet */
- uint32_t host_write_index;
-
- /* Input ring index, where the OCTEON 9 should read the next packet */
- uint32_t otx_read_index;
-
uint32_t reset_instr_cnt;
- /** This index aids in finding the window in the queue where OCTEON 9
- * has read the commands.
- */
- uint32_t flush_index;
-
/* Free-running/wrapping instruction counter for IQ. */
uint32_t inst_cnt;
- /* This keeps track of the instructions pending in this queue. */
- uint64_t instr_pending;
-
- /* Pointer to the Virtual Base addr of the input ring. */
- uint8_t *base_addr;
+ uint64_t partial_ih;
/* This IQ request list */
struct otx_ep_instr_list *req_list;
- /* OTX_EP doorbell register for the ring. */
- void *doorbell_reg;
-
- /* OTX_EP instruction count register for this ring. */
- void *inst_cnt_reg;
-
- /* Number of instructions pending to be posted to OCTEON 9. */
- uint32_t fill_cnt;
-
/* Statistics for this input queue. */
struct otx_ep_iq_stats stats;
/* DMA mapped base address of the input descriptor ring. */
uint64_t base_addr_dma;
-
- /* Memory zone */
- const struct rte_memzone *iq_mz;
-
- /* Location in memory updated by SDP ISM */
- uint32_t *inst_cnt_ism;
-
- /* track inst count locally to consolidate HW counter updates */
- uint32_t inst_cnt_ism_prev;
};
/** Descriptor format.
@@ -344,14 +356,17 @@ struct otx_ep_oq_config {
/* The Descriptor Ring Output Queue(DROQ) structure. */
struct otx_ep_droq {
- struct otx_ep_device *otx_ep_dev;
/* The 8B aligned descriptor ring starts at this address. */
struct otx_ep_droq_desc *desc_ring;
- uint32_t q_no;
- uint64_t last_pkt_count;
+ /* The 8B aligned info ptrs begin from this address. */
+ struct otx_ep_droq_info *info_list;
- struct rte_mempool *mpool;
+ /* receive buffer list contains mbuf ptr list */
+ struct rte_mbuf **recv_buf_list;
+
+ /* Packets pending to be processed */
+ uint64_t pkts_pending;
/* Driver should read the next packet at this index */
uint32_t read_idx;
@@ -362,22 +377,17 @@ struct otx_ep_droq {
/* At this index, the driver will refill the descriptor's buffer */
uint32_t refill_idx;
- /* Packets pending to be processed */
- uint64_t pkts_pending;
+ /* The number of descriptors pending to refill. */
+ uint32_t refill_count;
/* Number of descriptors in this ring. */
uint32_t nb_desc;
- /* The number of descriptors pending to refill. */
- uint32_t refill_count;
-
uint32_t refill_threshold;
- /* The 8B aligned info ptrs begin from this address. */
- struct otx_ep_droq_info *info_list;
+ uint64_t last_pkt_count;
- /* receive buffer list contains mbuf ptr list */
- struct rte_mbuf **recv_buf_list;
+ struct rte_mempool *mpool;
/* The size of each buffer pointed by the buffer pointer. */
uint32_t buffer_size;
@@ -392,6 +402,13 @@ struct otx_ep_droq {
*/
void *pkts_sent_reg;
+ /* Pointer to host memory copy of output packet count, set by ISM */
+ uint32_t *pkts_sent_ism;
+ uint32_t pkts_sent_ism_prev;
+
+ /* Statistics for this DROQ. */
+ struct otx_ep_droq_stats stats;
+
/** Handle DMA incompletion during pkt reads.
* This variable is used to initiate a sent_reg_read
* that completes pending dma
@@ -400,8 +417,9 @@ struct otx_ep_droq {
*/
uint32_t sent_reg_val;
- /* Statistics for this DROQ. */
- struct otx_ep_droq_stats stats;
+ uint32_t q_no;
+
+ struct otx_ep_device *otx_ep_dev;
/* DMA mapped address of the DROQ descriptor ring. */
size_t desc_ring_dma;
@@ -419,10 +437,6 @@ struct otx_ep_droq {
const struct rte_memzone *desc_ring_mz;
const struct rte_memzone *info_mz;
-
- /* Pointer to host memory copy of output packet count, set by ISM */
- uint32_t *pkts_sent_ism;
- uint32_t pkts_sent_ism_prev;
};
#define OTX_EP_DROQ_SIZE (sizeof(struct otx_ep_droq))
@@ -545,6 +559,9 @@ struct otx_ep_device {
/* Negotiated Mbox version */
uint32_t mbox_neg_ver;
+
+ /* Generation */
+ uint32_t chip_gen;
};
int otx_ep_setup_iqs(struct otx_ep_device *otx_ep, uint32_t iq_no,
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c b/drivers/net/octeon_ep/otx_ep_ethdev.c
index 970372bbd7..615cbbb648 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -27,6 +27,46 @@ static const struct rte_eth_desc_lim otx_ep_tx_desc_lim = {
.nb_align = OTX_EP_TXD_ALIGN,
};
+static void
+otx_ep_set_tx_func(struct rte_eth_dev *eth_dev)
+{
+ struct otx_ep_device *otx_epvf = OTX_EP_DEV(eth_dev);
+
+ if (otx_epvf->chip_gen == OTX_EP_CN10XX || otx_epvf->chip_gen == OTX_EP_CN9XX) {
+ eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts;
+ if (otx_epvf->tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS)
+ eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts_mseg;
+ } else {
+ eth_dev->tx_pkt_burst = &otx_ep_xmit_pkts;
+ }
+
+ if (eth_dev->data->dev_started)
+ rte_eth_fp_ops[eth_dev->data->port_id].tx_pkt_burst =
+ eth_dev->tx_pkt_burst;
+}
+
+static void
+otx_ep_set_rx_func(struct rte_eth_dev *eth_dev)
+{
+ struct otx_ep_device *otx_epvf = OTX_EP_DEV(eth_dev);
+
+ if (otx_epvf->chip_gen == OTX_EP_CN10XX) {
+ eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts;
+ if (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts_mseg;
+ } else if (otx_epvf->chip_gen == OTX_EP_CN9XX) {
+ eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts;
+ if (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts_mseg;
+ } else {
+ eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
+ }
+
+ if (eth_dev->data->dev_started)
+ rte_eth_fp_ops[eth_dev->data->port_id].rx_pkt_burst =
+ eth_dev->rx_pkt_burst;
+}
+
static int
otx_ep_dev_info_get(struct rte_eth_dev *eth_dev,
struct rte_eth_dev_info *devinfo)
@@ -154,6 +194,10 @@ otx_ep_dev_start(struct rte_eth_dev *eth_dev)
}
otx_ep_dev_link_update(eth_dev, 0);
+
+ otx_ep_set_tx_func(eth_dev);
+ otx_ep_set_rx_func(eth_dev);
+
otx_ep_info("dev started\n");
for (q = 0; q < eth_dev->data->nb_rx_queues; q++)
@@ -266,18 +310,23 @@ otx_epdev_init(struct otx_ep_device *otx_epvf)
otx_epvf->fn_list.setup_device_regs(otx_epvf);
+ otx_epvf->eth_dev->tx_pkt_burst = &cnxk_ep_xmit_pkts;
otx_epvf->eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
- if (otx_epvf->chip_id == PCI_DEVID_OCTEONTX_EP_VF)
+ if (otx_epvf->chip_id == PCI_DEVID_OCTEONTX_EP_VF) {
otx_epvf->eth_dev->tx_pkt_burst = &otx_ep_xmit_pkts;
- else if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
+ otx_epvf->chip_gen = OTX_EP_CN8XX;
+ } else if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CN98XX_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CNF95N_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
- otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF) {
- otx_epvf->eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
+ otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF) {
+ otx_epvf->eth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts;
+ otx_epvf->chip_gen = OTX_EP_CN9XX;
+ } else if (otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
+ otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF) {
+ otx_epvf->eth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts;
+ otx_epvf->chip_gen = OTX_EP_CN10XX;
} else {
otx_ep_err("Invalid chip_id\n");
ret = -EINVAL;
@@ -667,8 +716,8 @@ otx_ep_eth_dev_init(struct rte_eth_dev *eth_dev)
/* Single process support */
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
eth_dev->dev_ops = &otx_ep_eth_dev_ops;
- eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
- eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
+ otx_ep_set_tx_func(eth_dev);
+ otx_ep_set_rx_func(eth_dev);
return 0;
}
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c b/drivers/net/octeon_ep/otx_ep_rxtx.c
index 4c509a419f..c421ef0a1c 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -13,15 +13,8 @@
#include "otx_ep_common.h"
#include "otx_ep_vf.h"
-#include "otx2_ep_vf.h"
#include "otx_ep_rxtx.h"
-/* SDP_LENGTH_S specifies packet length and is of 8-byte size */
-#define OTX_EP_INFO_SIZE 8
-#define OTX_EP_FSZ_FS0 0
-#define DROQ_REFILL_THRESHOLD 16
-#define OTX2_SDP_REQUEST_ISM (0x1ULL << 63)
-
static void
otx_ep_dmazone_free(const struct rte_memzone *mz)
{
@@ -144,6 +137,13 @@ otx_ep_init_instr_queue(struct otx_ep_device *otx_ep, int iq_no, int num_descs,
iq_no, iq->base_addr, (unsigned long)iq->base_addr_dma,
iq->nb_desc);
+ iq->mbuf_list = rte_zmalloc_socket("mbuf_list", (iq->nb_desc * sizeof(struct rte_mbuf *)),
+ RTE_CACHE_LINE_SIZE, rte_socket_id());
+ if (!iq->mbuf_list) {
+ otx_ep_err("IQ[%d] mbuf_list alloc failed\n", iq_no);
+ goto iq_init_fail;
+ }
+
iq->otx_ep_dev = otx_ep;
iq->q_no = iq_no;
iq->fill_cnt = 0;
@@ -675,85 +675,6 @@ otx_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
return count;
}
-/* Enqueue requests/packets to OTX_EP IQ queue.
- * returns number of requests enqueued successfully
- */
-uint16_t
-otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
-{
- struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
- struct otx_ep_device *otx_ep = iq->otx_ep_dev;
- struct otx2_ep_instr_64B iqcmd2;
- uint32_t iqreq_type;
- struct rte_mbuf *m;
- uint32_t pkt_len;
- int count = 0;
- uint16_t i;
- int dbell;
- int index;
-
- iqcmd2.ih.u64 = 0;
- iqcmd2.irh.u64 = 0;
-
- /* ih invars */
- iqcmd2.ih.s.fsz = OTX_EP_FSZ_FS0;
- iqcmd2.ih.s.pkind = otx_ep->pkind; /* The SDK decided PKIND value */
- /* irh invars */
- iqcmd2.irh.s.opcode = OTX_EP_NW_PKT_OP;
-
- for (i = 0; i < nb_pkts; i++) {
- m = pkts[i];
- if (m->nb_segs == 1) {
- pkt_len = rte_pktmbuf_data_len(m);
- iqcmd2.ih.s.tlen = pkt_len + iqcmd2.ih.s.fsz;
- iqcmd2.dptr = rte_mbuf_data_iova(m); /*dptr*/
- iqcmd2.ih.s.gather = 0;
- iqcmd2.ih.s.gsz = 0;
- iqreq_type = OTX_EP_REQTYPE_NORESP_NET;
- } else {
- if (!(otx_ep->tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS))
- goto xmit_fail;
-
- if (unlikely(prepare_xmit_gather_list(iq, m, &iqcmd2.dptr, &iqcmd2.ih) < 0))
- goto xmit_fail;
-
- pkt_len = rte_pktmbuf_pkt_len(m);
- iqreq_type = OTX_EP_REQTYPE_NORESP_GATHER;
- }
-
- iqcmd2.irh.u64 = rte_bswap64(iqcmd2.irh.u64);
-
-#ifdef OTX_EP_IO_DEBUG
- otx_ep_dbg("After swapping\n");
- otx_ep_dbg("Word0 [dptr]: 0x%016lx\n",
- (unsigned long)iqcmd.dptr);
- otx_ep_dbg("Word1 [ihtx]: 0x%016lx\n", (unsigned long)iqcmd.ih);
- otx_ep_dbg("Word2 [pki_ih3]: 0x%016lx\n",
- (unsigned long)iqcmd.pki_ih3);
- otx_ep_dbg("Word3 [rptr]: 0x%016lx\n",
- (unsigned long)iqcmd.rptr);
- otx_ep_dbg("Word4 [irh]: 0x%016lx\n", (unsigned long)iqcmd.irh);
- otx_ep_dbg("Word5 [exhdr[0]]: 0x%016lx\n",
- (unsigned long)iqcmd.exhdr[0]);
-#endif
- index = iq->host_write_index;
- dbell = (i == (unsigned int)(nb_pkts - 1)) ? 1 : 0;
- if (otx_ep_send_data(otx_ep, iq, &iqcmd2, dbell))
- goto xmit_fail;
- otx_ep_iqreq_add(iq, m, iqreq_type, index);
- iq->stats.tx_pkts++;
- iq->stats.tx_bytes += pkt_len;
- count++;
- }
-
-xmit_fail:
- if (iq->instr_pending >= OTX_EP_MAX_INSTR)
- otx_ep_flush_iq(iq);
-
- /* Return no# of instructions posted successfully. */
- return count;
-}
-
static uint32_t
otx_ep_droq_refill(struct otx_ep_droq *droq)
{
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.h b/drivers/net/octeon_ep/otx_ep_rxtx.h
index 3f12527004..cb68ef3b41 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.h
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.h
@@ -7,29 +7,53 @@
#include <rte_byteorder.h>
-#define OTX_EP_RXD_ALIGN 2
-#define OTX_EP_TXD_ALIGN 2
+#define OTX_EP_RXD_ALIGN 8
+#define OTX_EP_TXD_ALIGN 8
#define OTX_EP_IQ_SEND_FAILED (-1)
#define OTX_EP_IQ_SEND_SUCCESS (0)
-#define OTX_EP_MAX_DELAYED_PKT_RETRIES 10000
+#define OTX_EP_MAX_DELAYED_PKT_RETRIES 10
#define OTX_EP_FSZ 28
#define OTX2_EP_FSZ 24
-#define OTX_EP_MAX_INSTR 16
+#define OTX_EP_MAX_INSTR 128
+
+/* SDP_LENGTH_S specifies packet length and is of 8-byte size */
+#define OTX_EP_INFO_SIZE 8
+#define DROQ_REFILL_THRESHOLD 16
+#define OTX2_SDP_REQUEST_ISM (0x1ULL << 63)
static inline uint32_t
otx_ep_incr_index(uint32_t index, uint32_t count, uint32_t max)
{
return ((index + count) & (max - 1));
}
+
uint16_t
otx_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
uint16_t
otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+otx_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cnxk_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts);
+
+uint16_t
+cnxk_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cnxk_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
+uint16_t
+cn9k_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
+
uint16_t
-otx_ep_recv_pkts(void *rx_queue,
- struct rte_mbuf **rx_pkts,
- uint16_t budget);
+cn9k_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
#endif /* _OTX_EP_RXTX_H_ */
--
2.25.1
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v6 3/3] net/octeon_ep: add new fastpath routines
2023-10-18 11:14 ` [PATCH v6 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
@ 2023-10-19 3:03 ` Jerin Jacob
0 siblings, 0 replies; 26+ messages in thread
From: Jerin Jacob @ 2023-10-19 3:03 UTC (permalink / raw)
To: Vamsi Attunuru; +Cc: dev, jerinj, sthotton
On Thu, Oct 19, 2023 at 6:52 AM Vamsi Attunuru <vattunuru@marvell.com> wrote:
>
> Adds new fastpath routines for cn10k & cn9k endpoint
> devices and assigns the fastpath routines based on
> the offload flags.
>
> Patch also adds misc changes to improve performance
> and code-readability.
>
> Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
Series applied to dpdk-next-net-mrvl/for-next-net. Thanks
^ permalink raw reply [flat|nested] 26+ messages in thread
end of thread, other threads:[~2023-10-19 3:04 UTC | newest]
Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-11 1:50 [PATCH 0/3] rewrite fastpath routines Vamsi Attunuru
2023-10-11 1:50 ` [PATCH 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
2023-10-11 1:50 ` [PATCH 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
2023-10-11 1:50 ` [PATCH 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
2023-10-11 8:36 ` [PATCH v2 0/3] rewrite " Vamsi Attunuru
2023-10-11 8:36 ` [PATCH v2 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
2023-10-11 8:36 ` [PATCH v2 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
2023-10-11 8:36 ` [PATCH v2 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
2023-10-11 12:53 ` [PATCH v3 0/3] rewrite " Vamsi Attunuru
2023-10-11 12:53 ` [PATCH v3 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
2023-10-11 12:53 ` [PATCH v3 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
2023-10-11 12:53 ` [PATCH v3 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
2023-10-12 6:23 ` [PATCH v4 0/3] rewrite " Vamsi Attunuru
2023-10-12 6:23 ` [PATCH v4 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
2023-10-12 6:23 ` [PATCH v4 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
2023-10-12 6:23 ` [PATCH v4 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
2023-10-18 3:48 ` Jerin Jacob
2023-10-18 8:07 ` [PATCH v5 0/3] rewrite " Vamsi Attunuru
2023-10-18 8:07 ` [PATCH v5 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
2023-10-18 8:07 ` [PATCH v5 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
2023-10-18 8:07 ` [PATCH v5 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
2023-10-18 11:14 ` [PATCH v6 0/3] rewrite " Vamsi Attunuru
2023-10-18 11:14 ` [PATCH v6 1/3] net/octeon_ep: support 32B IQ descriptor size Vamsi Attunuru
2023-10-18 11:14 ` [PATCH v6 2/3] net/octeon_ep: clean up receive routine Vamsi Attunuru
2023-10-18 11:14 ` [PATCH v6 3/3] net/octeon_ep: add new fastpath routines Vamsi Attunuru
2023-10-19 3:03 ` Jerin Jacob
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).