Add barrier after meta batch free in scalar routine when lmt lines are exactly full to make sure that next LMT line user in Tx only starts writing the lines only when previous stoerl's are complete. Fixes: 4382a7ccf781 ("net/cnxk: support Rx security offload on cn10k") Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> --- drivers/net/cnxk/cn10k_rx.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index e4f5a55..94c1f1e 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -1007,10 +1007,11 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, plt_write64((wdata | nb_pkts), rxq->cq_door); /* Free remaining meta buffers if any */ - if (flags & NIX_RX_OFFLOAD_SECURITY_F && loff) { + if (flags & NIX_RX_OFFLOAD_SECURITY_F && loff) nix_sec_flush_meta(laddr, lmt_id + lnum, loff, aura_handle); - plt_io_wmb(); - } + + if (flags & NIX_RX_OFFLOAD_SECURITY_F) + rte_io_wmb(); return nb_pkts; } -- 2.8.4
For transport mode, roundup needs to be based on L4 data and shouldn't include L3 length. By including l3 length, rlen that is calculated and put in send hdr would cross the final length of the packet in some scenarios where padding is necessary. Also when outer and inner checksum offload flags are enabled, get the l2_len and l3_len from il3ptr and il4ptr. Fixes: 55bfac717c72 ("net/cnxk: support Tx security offload on cn10k") Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> --- drivers/net/cnxk/cn10k_tx.h | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 981bc9b..c25825c 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -248,23 +248,29 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1, uint32_t pkt_len, dlen_adj, rlen; uint8_t l3l4type, chksum; uint64x2_t cmd01, cmd23; + uint8_t l2_len, l3_len; uintptr_t dptr, nixtx; uint64_t ucode_cmd[4]; uint64_t *laddr; - uint8_t l2_len; uint16_t tag; uint64_t sa; sess_priv.u64 = *rte_security_dynfield(m); if (flags & NIX_TX_NEED_SEND_HDR_W1) { - l2_len = vgetq_lane_u8(*cmd0, 8); /* Extract l3l4type either from il3il4type or ol3ol4type */ if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F && - flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) + flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) { + l2_len = vgetq_lane_u8(*cmd0, 10); + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = vgetq_lane_u8(*cmd0, 11) - l2_len; l3l4type = vgetq_lane_u8(*cmd0, 13); - else + } else { + l2_len = vgetq_lane_u8(*cmd0, 8); + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = vgetq_lane_u8(*cmd0, 9) - l2_len; l3l4type = vgetq_lane_u8(*cmd0, 12); + } chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30); chksum = ~chksum; @@ -273,6 +279,7 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1, *cmd0 = vsetq_lane_u16(0, *cmd0, 6); } else { l2_len = m->l2_len; + l3_len = m->l3_len; } /* Retrieve DPTR */ @@ -281,6 +288,8 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1, /* Calculate dlen adj */ dlen_adj = pkt_len - l2_len; + /* Exclude l3 len from roundup for transport mode */ + dlen_adj -= sess_priv.mode ? 0 : l3_len; rlen = (dlen_adj + sess_priv.roundup_len) + (sess_priv.roundup_byte - 1); rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1); @@ -360,10 +369,10 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, uint8_t l3l4type, chksum; uint64x2_t cmd01, cmd23; union nix_send_sg_s *sg; + uint8_t l2_len, l3_len; uintptr_t dptr, nixtx; uint64_t ucode_cmd[4]; uint64_t *laddr; - uint8_t l2_len; uint16_t tag; uint64_t sa; @@ -376,13 +385,19 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, sg = (union nix_send_sg_s *)&cmd[2]; if (flags & NIX_TX_NEED_SEND_HDR_W1) { - l2_len = cmd[1] & 0xFF; /* Extract l3l4type either from il3il4type or ol3ol4type */ if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F && - flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) + flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) { + l2_len = (cmd[1] >> 16) & 0xFF; + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = ((cmd[1] >> 24) & 0xFF) - l2_len; l3l4type = (cmd[1] >> 40) & 0xFF; - else + } else { + l2_len = cmd[1] & 0xFF; + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = ((cmd[1] >> 8) & 0xFF) - l2_len; l3l4type = (cmd[1] >> 32) & 0xFF; + } chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30); chksum = ~chksum; @@ -391,6 +406,7 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, cmd[1] &= ~(0xFFFFUL << 32); } else { l2_len = m->l2_len; + l3_len = m->l3_len; } /* Retrieve DPTR */ @@ -399,6 +415,8 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, /* Calculate dlen adj */ dlen_adj = pkt_len - l2_len; + /* Exclude l3 len from roundup for transport mode */ + dlen_adj -= sess_priv.mode ? 0 : l3_len; rlen = (dlen_adj + sess_priv.roundup_len) + (sess_priv.roundup_byte - 1); rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1); -- 2.8.4
Add barrier after meta batch free in scalar routine when lmt lines are exactly full to make sure that next LMT line user in Tx only starts writing the lines only when previous stoerl's are complete. Fixes: 4382a7ccf781 ("net/cnxk: support Rx security offload on cn10k") Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> --- drivers/net/cnxk/cn10k_rx.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index e4f5a55..94c1f1e 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -1007,10 +1007,11 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, plt_write64((wdata | nb_pkts), rxq->cq_door); /* Free remaining meta buffers if any */ - if (flags & NIX_RX_OFFLOAD_SECURITY_F && loff) { + if (flags & NIX_RX_OFFLOAD_SECURITY_F && loff) nix_sec_flush_meta(laddr, lmt_id + lnum, loff, aura_handle); - plt_io_wmb(); - } + + if (flags & NIX_RX_OFFLOAD_SECURITY_F) + rte_io_wmb(); return nb_pkts; } -- 2.8.4
For transport mode, roundup needs to be based on L4 data and shouldn't include L3 length. By including l3 length, rlen that is calculated and put in send hdr would cross the final length of the packet in some scenarios where padding is necessary. Also when outer and inner checksum offload flags are enabled, get the l2_len and l3_len from il3ptr and il4ptr. Fixes: 55bfac717c72 ("net/cnxk: support Tx security offload on cn10k") Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> --- drivers/net/cnxk/cn10k_tx.h | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 981bc9b..c25825c 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -248,23 +248,29 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1, uint32_t pkt_len, dlen_adj, rlen; uint8_t l3l4type, chksum; uint64x2_t cmd01, cmd23; + uint8_t l2_len, l3_len; uintptr_t dptr, nixtx; uint64_t ucode_cmd[4]; uint64_t *laddr; - uint8_t l2_len; uint16_t tag; uint64_t sa; sess_priv.u64 = *rte_security_dynfield(m); if (flags & NIX_TX_NEED_SEND_HDR_W1) { - l2_len = vgetq_lane_u8(*cmd0, 8); /* Extract l3l4type either from il3il4type or ol3ol4type */ if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F && - flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) + flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) { + l2_len = vgetq_lane_u8(*cmd0, 10); + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = vgetq_lane_u8(*cmd0, 11) - l2_len; l3l4type = vgetq_lane_u8(*cmd0, 13); - else + } else { + l2_len = vgetq_lane_u8(*cmd0, 8); + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = vgetq_lane_u8(*cmd0, 9) - l2_len; l3l4type = vgetq_lane_u8(*cmd0, 12); + } chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30); chksum = ~chksum; @@ -273,6 +279,7 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1, *cmd0 = vsetq_lane_u16(0, *cmd0, 6); } else { l2_len = m->l2_len; + l3_len = m->l3_len; } /* Retrieve DPTR */ @@ -281,6 +288,8 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1, /* Calculate dlen adj */ dlen_adj = pkt_len - l2_len; + /* Exclude l3 len from roundup for transport mode */ + dlen_adj -= sess_priv.mode ? 0 : l3_len; rlen = (dlen_adj + sess_priv.roundup_len) + (sess_priv.roundup_byte - 1); rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1); @@ -360,10 +369,10 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, uint8_t l3l4type, chksum; uint64x2_t cmd01, cmd23; union nix_send_sg_s *sg; + uint8_t l2_len, l3_len; uintptr_t dptr, nixtx; uint64_t ucode_cmd[4]; uint64_t *laddr; - uint8_t l2_len; uint16_t tag; uint64_t sa; @@ -376,13 +385,19 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, sg = (union nix_send_sg_s *)&cmd[2]; if (flags & NIX_TX_NEED_SEND_HDR_W1) { - l2_len = cmd[1] & 0xFF; /* Extract l3l4type either from il3il4type or ol3ol4type */ if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F && - flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) + flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) { + l2_len = (cmd[1] >> 16) & 0xFF; + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = ((cmd[1] >> 24) & 0xFF) - l2_len; l3l4type = (cmd[1] >> 40) & 0xFF; - else + } else { + l2_len = cmd[1] & 0xFF; + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = ((cmd[1] >> 8) & 0xFF) - l2_len; l3l4type = (cmd[1] >> 32) & 0xFF; + } chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30); chksum = ~chksum; @@ -391,6 +406,7 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, cmd[1] &= ~(0xFFFFUL << 32); } else { l2_len = m->l2_len; + l3_len = m->l3_len; } /* Retrieve DPTR */ @@ -399,6 +415,8 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, /* Calculate dlen adj */ dlen_adj = pkt_len - l2_len; + /* Exclude l3 len from roundup for transport mode */ + dlen_adj -= sess_priv.mode ? 0 : l3_len; rlen = (dlen_adj + sess_priv.roundup_len) + (sess_priv.roundup_byte - 1); rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1); -- 2.8.4
Fix multi-seg extraction in vwqe path to avoid updating mbuf[] array until it is used via cq0 path. Fixes: 7fbbc981d54f ("event/cnxk: support vectorized Rx event fast path") Cc: pbhagavatula@marvell.com Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Acked-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_rx.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index 00bec01..5ecb20f 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -1673,10 +1673,6 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); - /* Store the mbufs to rx_pkts */ - vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); - vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); - if (flags & NIX_RX_MULTI_SEG_F) { /* Multi segment is enable build mseg list for * individual mbufs in scalar mode. @@ -1695,6 +1691,10 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, mbuf3, mbuf_initializer, flags); } + /* Store the mbufs to rx_pkts */ + vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); + vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); + /* Mark mempool obj as "get" as it is alloc'ed by NIX */ RTE_MEMPOOL_CHECK_COOKIES(mbuf0->pool, (void **)&mbuf0, 1, 1); RTE_MEMPOOL_CHECK_COOKIES(mbuf1->pool, (void **)&mbuf1, 1, 1); -- 2.8.4
Fix hotplug detach sequence to handle case where first PCI device that is hosting NPA LF is being destroyed while in use. Fixes: 5a4341c84979 ("net/cnxk: add platform specific probe and remove") Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> --- drivers/net/cnxk/cn10k_ethdev.c | 6 +++++- drivers/net/cnxk/cn9k_ethdev.c | 6 +++++- drivers/net/cnxk/cnxk_ethdev.c | 8 ++++---- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c index bc9e10f..96eeae4 100644 --- a/drivers/net/cnxk/cn10k_ethdev.c +++ b/drivers/net/cnxk/cn10k_ethdev.c @@ -778,8 +778,12 @@ cn10k_nix_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) /* Find eth dev allocated */ eth_dev = rte_eth_dev_allocated(pci_dev->device.name); - if (!eth_dev) + if (!eth_dev) { + /* Ignore if ethdev is in mid of detach state in secondary */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; return -ENOENT; + } if (rte_eal_process_type() != RTE_PROC_PRIMARY) { /* Setup callbacks for secondary process */ diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c index de33fa7..b46f5da 100644 --- a/drivers/net/cnxk/cn9k_ethdev.c +++ b/drivers/net/cnxk/cn9k_ethdev.c @@ -708,8 +708,12 @@ cn9k_nix_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) /* Find eth dev allocated */ eth_dev = rte_eth_dev_allocated(pci_dev->device.name); - if (!eth_dev) + if (!eth_dev) { + /* Ignore if ethdev is in mid of detach state in secondary */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; return -ENOENT; + } if (rte_eal_process_type() != RTE_PROC_PRIMARY) { /* Setup callbacks for secondary process */ diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c index 12ff30f..3912c24 100644 --- a/drivers/net/cnxk/cnxk_ethdev.c +++ b/drivers/net/cnxk/cnxk_ethdev.c @@ -1781,9 +1781,6 @@ cnxk_eth_dev_uninit(struct rte_eth_dev *eth_dev, bool reset) struct rte_eth_fc_conf fc_conf; int rc, i; - /* Disable switch hdr pkind */ - roc_nix_switch_hdr_set(&dev->nix, 0, 0, 0, 0); - plt_free(eth_dev->security_ctx); eth_dev->security_ctx = NULL; @@ -1791,6 +1788,9 @@ cnxk_eth_dev_uninit(struct rte_eth_dev *eth_dev, bool reset) if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; + /* Disable switch hdr pkind */ + roc_nix_switch_hdr_set(&dev->nix, 0, 0, 0, 0); + /* Clear the flag since we are closing down */ dev->configured = 0; @@ -1927,7 +1927,7 @@ cnxk_nix_remove(struct rte_pci_device *pci_dev) /* Check if this device is hosting common resource */ nix = roc_idev_npa_nix_get(); - if (nix->pci_dev != pci_dev) + if (!nix || nix->pci_dev != pci_dev) return 0; /* Try nix fini now */ -- 2.8.4
Add barrier after meta batch free in scalar routine when lmt lines are exactly full to make sure that next LMT line user in Tx only starts writing the lines only when previous stoerl's are complete. Fixes: 4382a7ccf781 ("net/cnxk: support Rx security offload on cn10k") Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> --- drivers/net/cnxk/cn10k_rx.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index e4f5a55..94c1f1e 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -1007,10 +1007,11 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, plt_write64((wdata | nb_pkts), rxq->cq_door); /* Free remaining meta buffers if any */ - if (flags & NIX_RX_OFFLOAD_SECURITY_F && loff) { + if (flags & NIX_RX_OFFLOAD_SECURITY_F && loff) nix_sec_flush_meta(laddr, lmt_id + lnum, loff, aura_handle); - plt_io_wmb(); - } + + if (flags & NIX_RX_OFFLOAD_SECURITY_F) + rte_io_wmb(); return nb_pkts; } -- 2.8.4
For transport mode, roundup needs to be based on L4 data and shouldn't include L3 length. By including l3 length, rlen that is calculated and put in send hdr would cross the final length of the packet in some scenarios where padding is necessary. Also when outer and inner checksum offload flags are enabled, get the l2_len and l3_len from il3ptr and il4ptr. Fixes: 55bfac717c72 ("net/cnxk: support Tx security offload on cn10k") Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> --- drivers/net/cnxk/cn10k_tx.h | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 981bc9b..c25825c 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -248,23 +248,29 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1, uint32_t pkt_len, dlen_adj, rlen; uint8_t l3l4type, chksum; uint64x2_t cmd01, cmd23; + uint8_t l2_len, l3_len; uintptr_t dptr, nixtx; uint64_t ucode_cmd[4]; uint64_t *laddr; - uint8_t l2_len; uint16_t tag; uint64_t sa; sess_priv.u64 = *rte_security_dynfield(m); if (flags & NIX_TX_NEED_SEND_HDR_W1) { - l2_len = vgetq_lane_u8(*cmd0, 8); /* Extract l3l4type either from il3il4type or ol3ol4type */ if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F && - flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) + flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) { + l2_len = vgetq_lane_u8(*cmd0, 10); + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = vgetq_lane_u8(*cmd0, 11) - l2_len; l3l4type = vgetq_lane_u8(*cmd0, 13); - else + } else { + l2_len = vgetq_lane_u8(*cmd0, 8); + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = vgetq_lane_u8(*cmd0, 9) - l2_len; l3l4type = vgetq_lane_u8(*cmd0, 12); + } chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30); chksum = ~chksum; @@ -273,6 +279,7 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1, *cmd0 = vsetq_lane_u16(0, *cmd0, 6); } else { l2_len = m->l2_len; + l3_len = m->l3_len; } /* Retrieve DPTR */ @@ -281,6 +288,8 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1, /* Calculate dlen adj */ dlen_adj = pkt_len - l2_len; + /* Exclude l3 len from roundup for transport mode */ + dlen_adj -= sess_priv.mode ? 0 : l3_len; rlen = (dlen_adj + sess_priv.roundup_len) + (sess_priv.roundup_byte - 1); rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1); @@ -360,10 +369,10 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, uint8_t l3l4type, chksum; uint64x2_t cmd01, cmd23; union nix_send_sg_s *sg; + uint8_t l2_len, l3_len; uintptr_t dptr, nixtx; uint64_t ucode_cmd[4]; uint64_t *laddr; - uint8_t l2_len; uint16_t tag; uint64_t sa; @@ -376,13 +385,19 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, sg = (union nix_send_sg_s *)&cmd[2]; if (flags & NIX_TX_NEED_SEND_HDR_W1) { - l2_len = cmd[1] & 0xFF; /* Extract l3l4type either from il3il4type or ol3ol4type */ if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F && - flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) + flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) { + l2_len = (cmd[1] >> 16) & 0xFF; + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = ((cmd[1] >> 24) & 0xFF) - l2_len; l3l4type = (cmd[1] >> 40) & 0xFF; - else + } else { + l2_len = cmd[1] & 0xFF; + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = ((cmd[1] >> 8) & 0xFF) - l2_len; l3l4type = (cmd[1] >> 32) & 0xFF; + } chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30); chksum = ~chksum; @@ -391,6 +406,7 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, cmd[1] &= ~(0xFFFFUL << 32); } else { l2_len = m->l2_len; + l3_len = m->l3_len; } /* Retrieve DPTR */ @@ -399,6 +415,8 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, /* Calculate dlen adj */ dlen_adj = pkt_len - l2_len; + /* Exclude l3 len from roundup for transport mode */ + dlen_adj -= sess_priv.mode ? 0 : l3_len; rlen = (dlen_adj + sess_priv.roundup_len) + (sess_priv.roundup_byte - 1); rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1); -- 2.8.4
Fix multi-seg extraction in vwqe path to avoid updating mbuf[] array until it is used via cq0 path. Fixes: 7fbbc981d54f ("event/cnxk: support vectorized Rx event fast path") Cc: pbhagavatula@marvell.com Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Acked-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_rx.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index 00bec01..5ecb20f 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -1673,10 +1673,6 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); - /* Store the mbufs to rx_pkts */ - vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); - vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); - if (flags & NIX_RX_MULTI_SEG_F) { /* Multi segment is enable build mseg list for * individual mbufs in scalar mode. @@ -1695,6 +1691,10 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, mbuf3, mbuf_initializer, flags); } + /* Store the mbufs to rx_pkts */ + vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); + vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); + /* Mark mempool obj as "get" as it is alloc'ed by NIX */ RTE_MEMPOOL_CHECK_COOKIES(mbuf0->pool, (void **)&mbuf0, 1, 1); RTE_MEMPOOL_CHECK_COOKIES(mbuf1->pool, (void **)&mbuf1, 1, 1); -- 2.8.4
Fix hotplug detach sequence to handle case where first PCI device that is hosting NPA LF is being destroyed while in use. Fixes: 5a4341c84979 ("net/cnxk: add platform specific probe and remove") Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> --- drivers/net/cnxk/cn10k_ethdev.c | 6 +++++- drivers/net/cnxk/cn9k_ethdev.c | 6 +++++- drivers/net/cnxk/cnxk_ethdev.c | 8 ++++---- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c index bc9e10f..96eeae4 100644 --- a/drivers/net/cnxk/cn10k_ethdev.c +++ b/drivers/net/cnxk/cn10k_ethdev.c @@ -778,8 +778,12 @@ cn10k_nix_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) /* Find eth dev allocated */ eth_dev = rte_eth_dev_allocated(pci_dev->device.name); - if (!eth_dev) + if (!eth_dev) { + /* Ignore if ethdev is in mid of detach state in secondary */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; return -ENOENT; + } if (rte_eal_process_type() != RTE_PROC_PRIMARY) { /* Setup callbacks for secondary process */ diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c index de33fa7..b46f5da 100644 --- a/drivers/net/cnxk/cn9k_ethdev.c +++ b/drivers/net/cnxk/cn9k_ethdev.c @@ -708,8 +708,12 @@ cn9k_nix_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) /* Find eth dev allocated */ eth_dev = rte_eth_dev_allocated(pci_dev->device.name); - if (!eth_dev) + if (!eth_dev) { + /* Ignore if ethdev is in mid of detach state in secondary */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; return -ENOENT; + } if (rte_eal_process_type() != RTE_PROC_PRIMARY) { /* Setup callbacks for secondary process */ diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c index 12ff30f..3912c24 100644 --- a/drivers/net/cnxk/cnxk_ethdev.c +++ b/drivers/net/cnxk/cnxk_ethdev.c @@ -1781,9 +1781,6 @@ cnxk_eth_dev_uninit(struct rte_eth_dev *eth_dev, bool reset) struct rte_eth_fc_conf fc_conf; int rc, i; - /* Disable switch hdr pkind */ - roc_nix_switch_hdr_set(&dev->nix, 0, 0, 0, 0); - plt_free(eth_dev->security_ctx); eth_dev->security_ctx = NULL; @@ -1791,6 +1788,9 @@ cnxk_eth_dev_uninit(struct rte_eth_dev *eth_dev, bool reset) if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; + /* Disable switch hdr pkind */ + roc_nix_switch_hdr_set(&dev->nix, 0, 0, 0, 0); + /* Clear the flag since we are closing down */ dev->configured = 0; @@ -1927,7 +1927,7 @@ cnxk_nix_remove(struct rte_pci_device *pci_dev) /* Check if this device is hosting common resource */ nix = roc_idev_npa_nix_get(); - if (nix->pci_dev != pci_dev) + if (!nix || nix->pci_dev != pci_dev) return 0; /* Try nix fini now */ -- 2.8.4
Add barrier after meta batch free in scalar routine when lmt lines are exactly full to make sure that next LMT line user in Tx only starts writing the lines only when previous stoerl's are complete. Fixes: 4382a7ccf781 ("net/cnxk: support Rx security offload on cn10k") Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> --- drivers/net/cnxk/cn10k_rx.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index e4f5a55..94c1f1e 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -1007,10 +1007,11 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, plt_write64((wdata | nb_pkts), rxq->cq_door); /* Free remaining meta buffers if any */ - if (flags & NIX_RX_OFFLOAD_SECURITY_F && loff) { + if (flags & NIX_RX_OFFLOAD_SECURITY_F && loff) nix_sec_flush_meta(laddr, lmt_id + lnum, loff, aura_handle); - plt_io_wmb(); - } + + if (flags & NIX_RX_OFFLOAD_SECURITY_F) + rte_io_wmb(); return nb_pkts; } -- 2.8.4
For transport mode, roundup needs to be based on L4 data and shouldn't include L3 length. By including l3 length, rlen that is calculated and put in send hdr would cross the final length of the packet in some scenarios where padding is necessary. Also when outer and inner checksum offload flags are enabled, get the l2_len and l3_len from il3ptr and il4ptr. Fixes: 55bfac717c72 ("net/cnxk: support Tx security offload on cn10k") Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> --- drivers/net/cnxk/cn10k_tx.h | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index feaa41f..ae0573f 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -248,23 +248,29 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1, uint32_t pkt_len, dlen_adj, rlen; uint8_t l3l4type, chksum; uint64x2_t cmd01, cmd23; + uint8_t l2_len, l3_len; uintptr_t dptr, nixtx; uint64_t ucode_cmd[4]; uint64_t *laddr; - uint8_t l2_len; uint16_t tag; uint64_t sa; sess_priv.u64 = *rte_security_dynfield(m); if (flags & NIX_TX_NEED_SEND_HDR_W1) { - l2_len = vgetq_lane_u8(*cmd0, 8); /* Extract l3l4type either from il3il4type or ol3ol4type */ if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F && - flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) + flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) { + l2_len = vgetq_lane_u8(*cmd0, 10); + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = vgetq_lane_u8(*cmd0, 11) - l2_len; l3l4type = vgetq_lane_u8(*cmd0, 13); - else + } else { + l2_len = vgetq_lane_u8(*cmd0, 8); + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = vgetq_lane_u8(*cmd0, 9) - l2_len; l3l4type = vgetq_lane_u8(*cmd0, 12); + } chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30); chksum = ~chksum; @@ -273,6 +279,7 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1, *cmd0 = vsetq_lane_u16(0, *cmd0, 6); } else { l2_len = m->l2_len; + l3_len = m->l3_len; } /* Retrieve DPTR */ @@ -281,6 +288,8 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1, /* Calculate dlen adj */ dlen_adj = pkt_len - l2_len; + /* Exclude l3 len from roundup for transport mode */ + dlen_adj -= sess_priv.mode ? 0 : l3_len; rlen = (dlen_adj + sess_priv.roundup_len) + (sess_priv.roundup_byte - 1); rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1); @@ -360,10 +369,10 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, uint8_t l3l4type, chksum; uint64x2_t cmd01, cmd23; union nix_send_sg_s *sg; + uint8_t l2_len, l3_len; uintptr_t dptr, nixtx; uint64_t ucode_cmd[4]; uint64_t *laddr; - uint8_t l2_len; uint16_t tag; uint64_t sa; @@ -376,13 +385,19 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, sg = (union nix_send_sg_s *)&cmd[2]; if (flags & NIX_TX_NEED_SEND_HDR_W1) { - l2_len = cmd[1] & 0xFF; /* Extract l3l4type either from il3il4type or ol3ol4type */ if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F && - flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) + flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) { + l2_len = (cmd[1] >> 16) & 0xFF; + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = ((cmd[1] >> 24) & 0xFF) - l2_len; l3l4type = (cmd[1] >> 40) & 0xFF; - else + } else { + l2_len = cmd[1] & 0xFF; + /* L4 ptr from send hdr includes l2 and l3 len */ + l3_len = ((cmd[1] >> 8) & 0xFF) - l2_len; l3l4type = (cmd[1] >> 32) & 0xFF; + } chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30); chksum = ~chksum; @@ -391,6 +406,7 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, cmd[1] &= ~(0xFFFFUL << 32); } else { l2_len = m->l2_len; + l3_len = m->l3_len; } /* Retrieve DPTR */ @@ -399,6 +415,8 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, /* Calculate dlen adj */ dlen_adj = pkt_len - l2_len; + /* Exclude l3 len from roundup for transport mode */ + dlen_adj -= sess_priv.mode ? 0 : l3_len; rlen = (dlen_adj + sess_priv.roundup_len) + (sess_priv.roundup_byte - 1); rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1); -- 2.8.4
Fix multi-seg extraction in vwqe path to avoid updating mbuf[] array until it is used via cq0 path. Fixes: 7fbbc981d54f ("event/cnxk: support vectorized Rx event fast path") Cc: pbhagavatula@marvell.com Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Acked-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/net/cnxk/cn10k_rx.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index 00bec01..5ecb20f 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -1673,10 +1673,6 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); - /* Store the mbufs to rx_pkts */ - vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); - vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); - if (flags & NIX_RX_MULTI_SEG_F) { /* Multi segment is enable build mseg list for * individual mbufs in scalar mode. @@ -1695,6 +1691,10 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, mbuf3, mbuf_initializer, flags); } + /* Store the mbufs to rx_pkts */ + vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); + vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); + /* Mark mempool obj as "get" as it is alloc'ed by NIX */ RTE_MEMPOOL_CHECK_COOKIES(mbuf0->pool, (void **)&mbuf0, 1, 1); RTE_MEMPOOL_CHECK_COOKIES(mbuf1->pool, (void **)&mbuf1, 1, 1); -- 2.8.4
Fix hotplug detach sequence to handle case where first PCI device that is hosting NPA LF is being destroyed while in use. Fixes: 5a4341c84979 ("net/cnxk: add platform specific probe and remove") Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> --- drivers/net/cnxk/cn10k_ethdev.c | 6 +++++- drivers/net/cnxk/cn9k_ethdev.c | 6 +++++- drivers/net/cnxk/cnxk_ethdev.c | 8 ++++---- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c index bc9e10f..96eeae4 100644 --- a/drivers/net/cnxk/cn10k_ethdev.c +++ b/drivers/net/cnxk/cn10k_ethdev.c @@ -778,8 +778,12 @@ cn10k_nix_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) /* Find eth dev allocated */ eth_dev = rte_eth_dev_allocated(pci_dev->device.name); - if (!eth_dev) + if (!eth_dev) { + /* Ignore if ethdev is in mid of detach state in secondary */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; return -ENOENT; + } if (rte_eal_process_type() != RTE_PROC_PRIMARY) { /* Setup callbacks for secondary process */ diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c index 05a2022..4fb0e2d 100644 --- a/drivers/net/cnxk/cn9k_ethdev.c +++ b/drivers/net/cnxk/cn9k_ethdev.c @@ -708,8 +708,12 @@ cn9k_nix_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) /* Find eth dev allocated */ eth_dev = rte_eth_dev_allocated(pci_dev->device.name); - if (!eth_dev) + if (!eth_dev) { + /* Ignore if ethdev is in mid of detach state in secondary */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; return -ENOENT; + } if (rte_eal_process_type() != RTE_PROC_PRIMARY) { /* Setup callbacks for secondary process */ diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c index 12ff30f..3912c24 100644 --- a/drivers/net/cnxk/cnxk_ethdev.c +++ b/drivers/net/cnxk/cnxk_ethdev.c @@ -1781,9 +1781,6 @@ cnxk_eth_dev_uninit(struct rte_eth_dev *eth_dev, bool reset) struct rte_eth_fc_conf fc_conf; int rc, i; - /* Disable switch hdr pkind */ - roc_nix_switch_hdr_set(&dev->nix, 0, 0, 0, 0); - plt_free(eth_dev->security_ctx); eth_dev->security_ctx = NULL; @@ -1791,6 +1788,9 @@ cnxk_eth_dev_uninit(struct rte_eth_dev *eth_dev, bool reset) if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; + /* Disable switch hdr pkind */ + roc_nix_switch_hdr_set(&dev->nix, 0, 0, 0, 0); + /* Clear the flag since we are closing down */ dev->configured = 0; @@ -1927,7 +1927,7 @@ cnxk_nix_remove(struct rte_pci_device *pci_dev) /* Check if this device is hosting common resource */ nix = roc_idev_npa_nix_get(); - if (nix->pci_dev != pci_dev) + if (!nix || nix->pci_dev != pci_dev) return 0; /* Try nix fini now */ -- 2.8.4