From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id CBB8FA0547; Tue, 26 Oct 2021 16:32:00 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 393DB4111D; Tue, 26 Oct 2021 16:31:53 +0200 (CEST) Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by mails.dpdk.org (Postfix) with ESMTP id 82416410FE for ; Tue, 26 Oct 2021 16:31:49 +0200 (CEST) X-IronPort-AV: E=McAfee;i="6200,9189,10149"; a="253461802" X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="253461802" Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Oct 2021 07:10:50 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="722385809" Received: from silpixa00400884.ir.intel.com ([10.243.22.82]) by fmsmga006.fm.intel.com with ESMTP; 26 Oct 2021 07:10:48 -0700 From: Radu Nicolau To: Jingjing Wu , Beilei Xing , Bruce Richardson , Konstantin Ananyev Cc: dev@dpdk.org, declan.doherty@intel.com, abhijit.sinha@intel.com, qi.z.zhang@intel.com, Radu Nicolau Date: Tue, 26 Oct 2021 14:56:52 +0100 Message-Id: <20211026135657.2034763-3-radu.nicolau@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20211026135657.2034763-1-radu.nicolau@intel.com> References: <20210909142428.750634-1-radu.nicolau@intel.com> <20211026135657.2034763-1-radu.nicolau@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [dpdk-dev] [PATCH v12 2/7] net/iavf: rework tx path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Rework the TX path and TX descriptor usage in order to allow for better use of oflload flags and to facilitate enabling of inline crypto offload feature. Signed-off-by: Declan Doherty Signed-off-by: Abhijit Sinha Signed-off-by: Radu Nicolau Acked-by: Jingjing Wu --- drivers/net/iavf/iavf_rxtx.c | 538 ++++++++++++++++----------- drivers/net/iavf/iavf_rxtx.h | 117 +++++- drivers/net/iavf/iavf_rxtx_vec_sse.c | 10 +- 3 files changed, 431 insertions(+), 234 deletions(-) diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c index 52d919ca1b..128691aaf1 100644 --- a/drivers/net/iavf/iavf_rxtx.c +++ b/drivers/net/iavf/iavf_rxtx.c @@ -1054,27 +1054,31 @@ iavf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union iavf_rx_desc *rxdp) static inline void iavf_flex_rxd_to_vlan_tci(struct rte_mbuf *mb, - volatile union iavf_rx_flex_desc *rxdp, - uint8_t rx_flags) + volatile union iavf_rx_flex_desc *rxdp) { - uint16_t vlan_tci = 0; - - if (rx_flags & IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG1 && - rte_le_to_cpu_64(rxdp->wb.status_error0) & - (1 << IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S)) - vlan_tci = rte_le_to_cpu_16(rxdp->wb.l2tag1); + if (rte_le_to_cpu_64(rxdp->wb.status_error0) & + (1 << IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S)) { + mb->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; + mb->vlan_tci = + rte_le_to_cpu_16(rxdp->wb.l2tag1); + } else { + mb->vlan_tci = 0; + } #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC - if (rx_flags & IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2 && - rte_le_to_cpu_16(rxdp->wb.status_error1) & - (1 << IAVF_RX_FLEX_DESC_STATUS1_L2TAG2P_S)) - vlan_tci = rte_le_to_cpu_16(rxdp->wb.l2tag2_2nd); -#endif - - if (vlan_tci) { - mb->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; - mb->vlan_tci = vlan_tci; + if (rte_le_to_cpu_16(rxdp->wb.status_error1) & + (1 << IAVF_RX_FLEX_DESC_STATUS1_L2TAG2P_S)) { + mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED | RTE_MBUF_F_RX_QINQ | + RTE_MBUF_F_RX_VLAN_STRIPPED | RTE_MBUF_F_RX_VLAN; + mb->vlan_tci_outer = mb->vlan_tci; + mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.l2tag2_2nd); + PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u", + rte_le_to_cpu_16(rxdp->wb.l2tag2_1st), + rte_le_to_cpu_16(rxdp->wb.l2tag2_2nd)); + } else { + mb->vlan_tci_outer = 0; } +#endif } /* Translate the rx descriptor status and error fields to pkt flags */ @@ -1394,7 +1398,7 @@ iavf_recv_pkts_flex_rxd(void *rx_queue, rxm->ol_flags = 0; rxm->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M & rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)]; - iavf_flex_rxd_to_vlan_tci(rxm, &rxd, rxq->rx_flags); + iavf_flex_rxd_to_vlan_tci(rxm, &rxd); rxq->rxd_to_pkt_fields(rxq, rxm, &rxd); pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0); rxm->ol_flags |= pkt_flags; @@ -1536,7 +1540,7 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts, first_seg->ol_flags = 0; first_seg->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M & rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)]; - iavf_flex_rxd_to_vlan_tci(first_seg, &rxd, rxq->rx_flags); + iavf_flex_rxd_to_vlan_tci(first_seg, &rxd); rxq->rxd_to_pkt_fields(rxq, first_seg, &rxd); pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0); @@ -1774,7 +1778,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq) mb->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M & rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)]; - iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j], rxq->rx_flags); + iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]); rxq->rxd_to_pkt_fields(rxq, mb, &rxdp[j]); stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0); pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0); @@ -2068,190 +2072,302 @@ iavf_xmit_cleanup(struct iavf_tx_queue *txq) return 0; } -/* Check if the context descriptor is needed for TX offloading */ + + +static inline void +iavf_fill_ctx_desc_cmd_field(volatile uint64_t *field, struct rte_mbuf *m) +{ + uint64_t cmd = 0; + + /* TSO enabled */ + if (m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) + cmd = IAVF_TX_CTX_DESC_TSO << IAVF_TXD_DATA_QW1_CMD_SHIFT; + + /* Time Sync - Currently not supported */ + + /* Outer L2 TAG 2 Insertion - Currently not supported */ + /* Inner L2 TAG 2 Insertion - Currently not supported */ + + *field |= cmd; +} + +static inline void +iavf_fill_ctx_desc_tunnelling_field(volatile uint64_t *qw0, + const struct rte_mbuf *m) +{ + uint64_t eip_typ = IAVF_TX_CTX_DESC_EIPT_NONE; + uint64_t eip_len = 0; + uint64_t eip_noinc = 0; + /* Default - IP_ID is increment in each segment of LSO */ + + switch (m->ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6 | + RTE_MBUF_F_TX_OUTER_IP_CKSUM)) { + case RTE_MBUF_F_TX_OUTER_IPV4: + eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV4_NO_CHECKSUM_OFFLOAD; + eip_len = m->outer_l3_len >> 2; + break; + case RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IP_CKSUM: + eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV4_CHECKSUM_OFFLOAD; + eip_len = m->outer_l3_len >> 2; + break; + case RTE_MBUF_F_TX_OUTER_IPV6: + eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV6; + eip_len = m->outer_l3_len >> 2; + break; + } + + *qw0 = eip_typ << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPT_SHIFT | + eip_len << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPLEN_SHIFT | + eip_noinc << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIP_NOINC_SHIFT; +} + static inline uint16_t -iavf_calc_context_desc(uint64_t flags, uint8_t vlan_flag) +iavf_fill_ctx_desc_segmentation_field(volatile uint64_t *field, + struct rte_mbuf *m) { - if (flags & RTE_MBUF_F_TX_TCP_SEG) - return 1; - if (flags & RTE_MBUF_F_TX_VLAN && - vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) - return 1; - return 0; + uint64_t segmentation_field = 0; + uint64_t total_length = 0; + + total_length = m->pkt_len - (m->l2_len + m->l3_len + m->l4_len); + + if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) + total_length -= m->outer_l3_len; + +#ifdef RTE_LIBRTE_IAVF_DEBUG_TX + if (!m->l4_len || !m->tso_segsz) + PMD_TX_LOG(DEBUG, "L4 length %d, LSO Segment size %d", + m->l4_len, m->tso_segsz); + if (m->tso_segsz < 88) + PMD_TX_LOG(DEBUG, "LSO Segment size %d is less than minimum %d", + m->tso_segsz, 88); +#endif + segmentation_field = + (((uint64_t)total_length << IAVF_TXD_CTX_QW1_TSO_LEN_SHIFT) & + IAVF_TXD_CTX_QW1_TSO_LEN_MASK) | + (((uint64_t)m->tso_segsz << IAVF_TXD_CTX_QW1_MSS_SHIFT) & + IAVF_TXD_CTX_QW1_MSS_MASK); + + *field |= segmentation_field; + + return total_length; } + +struct iavf_tx_context_desc_qws { + __le64 qw0; + __le64 qw1; +}; + static inline void -iavf_txd_enable_checksum(uint64_t ol_flags, - uint32_t *td_cmd, - uint32_t *td_offset, - union iavf_tx_offload tx_offload) +iavf_fill_context_desc(volatile struct iavf_tx_context_desc *desc, + struct rte_mbuf *m, uint16_t *tlen) { + volatile struct iavf_tx_context_desc_qws *desc_qws = + (volatile struct iavf_tx_context_desc_qws *)desc; + /* fill descriptor type field */ + desc_qws->qw1 = IAVF_TX_DESC_DTYPE_CONTEXT; + + /* fill command field */ + iavf_fill_ctx_desc_cmd_field(&desc_qws->qw1, m); + + /* fill segmentation field */ + if (m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) { + *tlen = iavf_fill_ctx_desc_segmentation_field(&desc_qws->qw1, + m); + } + + /* fill tunnelling field */ + if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) + iavf_fill_ctx_desc_tunnelling_field(&desc_qws->qw0, m); + else + desc_qws->qw0 = 0; + + desc_qws->qw0 = rte_cpu_to_le_64(desc_qws->qw0); + desc_qws->qw1 = rte_cpu_to_le_64(desc_qws->qw1); +} + + +static inline void +iavf_build_data_desc_cmd_offset_fields(volatile uint64_t *qw1, + struct rte_mbuf *m) +{ + uint64_t command = 0; + uint64_t offset = 0; + uint64_t l2tag1 = 0; + + *qw1 = IAVF_TX_DESC_DTYPE_DATA; + + command = (uint64_t)IAVF_TX_DESC_CMD_ICRC; + + /* Descriptor based VLAN insertion */ + if (m->ol_flags & RTE_MBUF_F_TX_VLAN) { + command |= (uint64_t)IAVF_TX_DESC_CMD_IL2TAG1; + l2tag1 |= m->vlan_tci; + } + /* Set MACLEN */ - *td_offset |= (tx_offload.l2_len >> 1) << - IAVF_TX_DESC_LENGTH_MACLEN_SHIFT; - - /* Enable L3 checksum offloads */ - if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) { - *td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM; - *td_offset |= (tx_offload.l3_len >> 2) << - IAVF_TX_DESC_LENGTH_IPLEN_SHIFT; - } else if (ol_flags & RTE_MBUF_F_TX_IPV4) { - *td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4; - *td_offset |= (tx_offload.l3_len >> 2) << - IAVF_TX_DESC_LENGTH_IPLEN_SHIFT; - } else if (ol_flags & RTE_MBUF_F_TX_IPV6) { - *td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV6; - *td_offset |= (tx_offload.l3_len >> 2) << - IAVF_TX_DESC_LENGTH_IPLEN_SHIFT; - } - - if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { - *td_cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP; - *td_offset |= (tx_offload.l4_len >> 2) << + offset |= (m->l2_len >> 1) << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT; + + /* Enable L3 checksum offloading inner */ + if (m->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_IPV4)) { + command |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM; + offset |= (m->l3_len >> 2) << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT; + } else if (m->ol_flags & RTE_MBUF_F_TX_IPV4) { + command |= IAVF_TX_DESC_CMD_IIPT_IPV4; + offset |= (m->l3_len >> 2) << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT; + } else if (m->ol_flags & RTE_MBUF_F_TX_IPV6) { + command |= IAVF_TX_DESC_CMD_IIPT_IPV6; + offset |= (m->l3_len >> 2) << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT; + } + + if (m->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { + command |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP; + offset |= (m->l4_len >> 2) << IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; - return; } /* Enable L4 checksum offloads */ - switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) { + switch (m->ol_flags & RTE_MBUF_F_TX_L4_MASK) { case RTE_MBUF_F_TX_TCP_CKSUM: - *td_cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP; - *td_offset |= (sizeof(struct rte_tcp_hdr) >> 2) << - IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + command |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP; + offset |= (sizeof(struct rte_tcp_hdr) >> 2) << + IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case RTE_MBUF_F_TX_SCTP_CKSUM: - *td_cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_SCTP; - *td_offset |= (sizeof(struct rte_sctp_hdr) >> 2) << - IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + command |= IAVF_TX_DESC_CMD_L4T_EOFT_SCTP; + offset |= (sizeof(struct rte_sctp_hdr) >> 2) << + IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case RTE_MBUF_F_TX_UDP_CKSUM: - *td_cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_UDP; - *td_offset |= (sizeof(struct rte_udp_hdr) >> 2) << - IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; - break; - default: + command |= IAVF_TX_DESC_CMD_L4T_EOFT_UDP; + offset |= (sizeof(struct rte_udp_hdr) >> 2) << + IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; } + + *qw1 = rte_cpu_to_le_64((((uint64_t)command << + IAVF_TXD_DATA_QW1_CMD_SHIFT) & IAVF_TXD_DATA_QW1_CMD_MASK) | + (((uint64_t)offset << IAVF_TXD_DATA_QW1_OFFSET_SHIFT) & + IAVF_TXD_DATA_QW1_OFFSET_MASK) | + ((uint64_t)l2tag1 << IAVF_TXD_DATA_QW1_L2TAG1_SHIFT)); } -/* set TSO context descriptor - * support IP -> L4 and IP -> IP -> L4 - */ -static inline uint64_t -iavf_set_tso_ctx(struct rte_mbuf *mbuf, union iavf_tx_offload tx_offload) +static inline void +iavf_fill_data_desc_buffer_sz_field(volatile uint64_t *field, uint16_t value) { - uint64_t ctx_desc = 0; - uint32_t cd_cmd, hdr_len, cd_tso_len; - - if (!tx_offload.l4_len) { - PMD_TX_LOG(DEBUG, "L4 length set to 0"); - return ctx_desc; + *field |= (((uint64_t)value << IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT) & + IAVF_TXD_DATA_QW1_TX_BUF_SZ_MASK); } - hdr_len = tx_offload.l2_len + - tx_offload.l3_len + - tx_offload.l4_len; +static inline void +iavf_fill_data_desc(volatile struct iavf_tx_desc *desc, + struct rte_mbuf *m, uint64_t desc_template, + uint16_t tlen, uint16_t ipseclen) +{ + uint32_t hdrlen = m->l2_len; + uint32_t bufsz = 0; - cd_cmd = IAVF_TX_CTX_DESC_TSO; - cd_tso_len = mbuf->pkt_len - hdr_len; - ctx_desc |= ((uint64_t)cd_cmd << IAVF_TXD_CTX_QW1_CMD_SHIFT) | - ((uint64_t)cd_tso_len << IAVF_TXD_CTX_QW1_TSO_LEN_SHIFT) | - ((uint64_t)mbuf->tso_segsz << IAVF_TXD_CTX_QW1_MSS_SHIFT); + /* fill data descriptor qw1 from template */ + desc->cmd_type_offset_bsz = desc_template; - return ctx_desc; -} + /* set data buffer address */ + desc->buffer_addr = rte_mbuf_data_iova(m); -/* Construct the tx flags */ -static inline uint64_t -iavf_build_ctob(uint32_t td_cmd, uint32_t td_offset, unsigned int size, - uint32_t td_tag) -{ - return rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DATA | - ((uint64_t)td_cmd << IAVF_TXD_QW1_CMD_SHIFT) | - ((uint64_t)td_offset << - IAVF_TXD_QW1_OFFSET_SHIFT) | - ((uint64_t)size << - IAVF_TXD_QW1_TX_BUF_SZ_SHIFT) | - ((uint64_t)td_tag << - IAVF_TXD_QW1_L2TAG1_SHIFT)); + /* calculate data buffer size less set header lengths */ + if ((m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) && + (m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG))) { + hdrlen += m->outer_l3_len; + if (m->ol_flags & RTE_MBUF_F_TX_L4_MASK) + hdrlen += m->l3_len + m->l4_len; + else + hdrlen += m->l3_len; + if (m->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) + hdrlen += ipseclen; + bufsz = hdrlen + tlen; + } else { + bufsz = m->data_len; + } + + /* set data buffer size */ + desc->cmd_type_offset_bsz |= + (((uint64_t)bufsz << IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT) & + IAVF_TXD_DATA_QW1_TX_BUF_SZ_MASK); + + desc->buffer_addr = rte_cpu_to_le_64(desc->buffer_addr); + desc->cmd_type_offset_bsz = rte_cpu_to_le_64(desc->cmd_type_offset_bsz); } + /* TX function */ uint16_t iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { - volatile struct iavf_tx_desc *txd; - volatile struct iavf_tx_desc *txr; - struct iavf_tx_queue *txq; - struct iavf_tx_entry *sw_ring; + struct iavf_tx_queue *txq = tx_queue; + volatile struct iavf_tx_desc *txr = txq->tx_ring; + struct iavf_tx_entry *txe_ring = txq->sw_ring; struct iavf_tx_entry *txe, *txn; - struct rte_mbuf *tx_pkt; - struct rte_mbuf *m_seg; - uint16_t tx_id; - uint16_t nb_tx; - uint32_t td_cmd; - uint32_t td_offset; - uint32_t td_tag; - uint64_t ol_flags; - uint16_t nb_used; - uint16_t nb_ctx; - uint16_t tx_last; - uint16_t slen; - uint64_t buf_dma_addr; - uint16_t cd_l2tag2 = 0; - union iavf_tx_offload tx_offload = {0}; - - txq = tx_queue; - sw_ring = txq->sw_ring; - txr = txq->tx_ring; - tx_id = txq->tx_tail; - txe = &sw_ring[tx_id]; + struct rte_mbuf *mb, *mb_seg; + uint16_t desc_idx, desc_idx_last; + uint16_t idx; + /* Check if the descriptor ring needs to be cleaned. */ if (txq->nb_free < txq->free_thresh) - (void)iavf_xmit_cleanup(txq); + iavf_xmit_cleanup(txq); + + desc_idx = txq->tx_tail; + txe = &txe_ring[desc_idx]; + +#ifdef RTE_LIBRTE_IAVF_DEBUG_TX_DESC_RING + iavf_dump_tx_entry_ring(txq); + iavf_dump_tx_desc_ring(txq); +#endif + - for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { - td_cmd = 0; - td_tag = 0; - td_offset = 0; + for (idx = 0; idx < nb_pkts; idx++) { + volatile struct iavf_tx_desc *ddesc; + uint16_t nb_desc_ctx; + uint16_t nb_desc_data, nb_desc_required; + uint16_t tlen = 0, ipseclen = 0; + uint64_t ddesc_template = 0; + uint64_t ddesc_cmd = 0; + + mb = tx_pkts[idx]; - tx_pkt = *tx_pkts++; RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf); - ol_flags = tx_pkt->ol_flags; - tx_offload.l2_len = tx_pkt->l2_len; - tx_offload.l3_len = tx_pkt->l3_len; - tx_offload.l4_len = tx_pkt->l4_len; - tx_offload.tso_segsz = tx_pkt->tso_segsz; - /* Calculate the number of context descriptors needed. */ - nb_ctx = iavf_calc_context_desc(ol_flags, txq->vlan_flag); + nb_desc_data = mb->nb_segs; + nb_desc_ctx = !!(mb->ol_flags & + (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG | RTE_MBUF_F_TX_TUNNEL_MASK)); - /* The number of descriptors that must be allocated for + /** + * The number of descriptors that must be allocated for * a packet equals to the number of the segments of that - * packet plus 1 context descriptor if needed. + * packet plus the context and ipsec descriptors if needed. */ - nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx); - tx_last = (uint16_t)(tx_id + nb_used - 1); + nb_desc_required = nb_desc_data + nb_desc_ctx; + + desc_idx_last = (uint16_t)(desc_idx + nb_desc_required - 1); - /* Circular ring */ - if (tx_last >= txq->nb_tx_desc) - tx_last = (uint16_t)(tx_last - txq->nb_tx_desc); + /* wrap descriptor ring */ + if (desc_idx_last >= txq->nb_tx_desc) + desc_idx_last = + (uint16_t)(desc_idx_last - txq->nb_tx_desc); - PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u" - " tx_first=%u tx_last=%u", - txq->port_id, txq->queue_id, tx_id, tx_last); + PMD_TX_LOG(DEBUG, + "port_id=%u queue_id=%u tx_first=%u tx_last=%u", + txq->port_id, txq->queue_id, desc_idx, desc_idx_last); - if (nb_used > txq->nb_free) { + if (nb_desc_required > txq->nb_free) { if (iavf_xmit_cleanup(txq)) { - if (nb_tx == 0) + if (idx == 0) return 0; goto end_of_tx; } - if (unlikely(nb_used > txq->rs_thresh)) { - while (nb_used > txq->nb_free) { + if (unlikely(nb_desc_required > txq->rs_thresh)) { + while (nb_desc_required > txq->nb_free) { if (iavf_xmit_cleanup(txq)) { - if (nb_tx == 0) + if (idx == 0) return 0; goto end_of_tx; } @@ -2259,122 +2375,94 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) } } - /* Descriptor based VLAN insertion */ - if (ol_flags & RTE_MBUF_F_TX_VLAN && - txq->vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1) { - td_cmd |= IAVF_TX_DESC_CMD_IL2TAG1; - td_tag = tx_pkt->vlan_tci; - } - - /* According to datasheet, the bit2 is reserved and must be - * set to 1. - */ - td_cmd |= 0x04; - - /* Enable checksum offloading */ - if (ol_flags & IAVF_TX_CKSUM_OFFLOAD_MASK) - iavf_txd_enable_checksum(ol_flags, &td_cmd, - &td_offset, tx_offload); + iavf_build_data_desc_cmd_offset_fields(&ddesc_template, mb); - if (nb_ctx) { /* Setup TX context descriptor if required */ - uint64_t cd_type_cmd_tso_mss = - IAVF_TX_DESC_DTYPE_CONTEXT; - volatile struct iavf_tx_context_desc *ctx_txd = + if (nb_desc_ctx) { + volatile struct iavf_tx_context_desc *ctx_desc = (volatile struct iavf_tx_context_desc *) - &txr[tx_id]; + &txr[desc_idx]; /* clear QW0 or the previous writeback value * may impact next write */ - *(volatile uint64_t *)ctx_txd = 0; + *(volatile uint64_t *)ctx_desc = 0; - txn = &sw_ring[txe->next_id]; + txn = &txe_ring[txe->next_id]; RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf); + if (txe->mbuf) { rte_pktmbuf_free_seg(txe->mbuf); txe->mbuf = NULL; } - /* TSO enabled */ - if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) - cd_type_cmd_tso_mss |= - iavf_set_tso_ctx(tx_pkt, tx_offload); + iavf_fill_context_desc(ctx_desc, mb, &tlen); + IAVF_DUMP_TX_DESC(txq, ctx_desc, desc_idx); - if (ol_flags & RTE_MBUF_F_TX_VLAN && - txq->vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) { - cd_type_cmd_tso_mss |= IAVF_TX_CTX_DESC_IL2TAG2 - << IAVF_TXD_CTX_QW1_CMD_SHIFT; - cd_l2tag2 = tx_pkt->vlan_tci; + txe->last_id = desc_idx_last; + desc_idx = txe->next_id; + txe = txn; } - ctx_txd->type_cmd_tso_mss = - rte_cpu_to_le_64(cd_type_cmd_tso_mss); - ctx_txd->l2tag2 = rte_cpu_to_le_16(cd_l2tag2); - IAVF_DUMP_TX_DESC(txq, &txr[tx_id], tx_id); - txe->last_id = tx_last; - tx_id = txe->next_id; - txe = txn; - } - m_seg = tx_pkt; + mb_seg = mb; + do { - txd = &txr[tx_id]; - txn = &sw_ring[txe->next_id]; + ddesc = (volatile struct iavf_tx_desc *) + &txr[desc_idx]; + + txn = &txe_ring[txe->next_id]; + RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf); if (txe->mbuf) rte_pktmbuf_free_seg(txe->mbuf); - txe->mbuf = m_seg; - - /* Setup TX Descriptor */ - slen = m_seg->data_len; - buf_dma_addr = rte_mbuf_data_iova(m_seg); - txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr); - txd->cmd_type_offset_bsz = iavf_build_ctob(td_cmd, - td_offset, - slen, - td_tag); - - IAVF_DUMP_TX_DESC(txq, txd, tx_id); - txe->last_id = tx_last; - tx_id = txe->next_id; + + txe->mbuf = mb_seg; + iavf_fill_data_desc(ddesc, mb_seg, + ddesc_template, tlen, ipseclen); + + IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx); + + txe->last_id = desc_idx_last; + desc_idx = txe->next_id; txe = txn; - m_seg = m_seg->next; - } while (m_seg); + mb_seg = mb_seg->next; + } while (mb_seg); /* The last packet data descriptor needs End Of Packet (EOP) */ - td_cmd |= IAVF_TX_DESC_CMD_EOP; - txq->nb_used = (uint16_t)(txq->nb_used + nb_used); - txq->nb_free = (uint16_t)(txq->nb_free - nb_used); + ddesc_cmd = IAVF_TX_DESC_CMD_EOP; + + txq->nb_used = (uint16_t)(txq->nb_used + nb_desc_required); + txq->nb_free = (uint16_t)(txq->nb_free - nb_desc_required); if (txq->nb_used >= txq->rs_thresh) { PMD_TX_LOG(DEBUG, "Setting RS bit on TXD id=" "%4u (port=%d queue=%d)", - tx_last, txq->port_id, txq->queue_id); + desc_idx_last, txq->port_id, txq->queue_id); - td_cmd |= IAVF_TX_DESC_CMD_RS; + ddesc_cmd |= IAVF_TX_DESC_CMD_RS; /* Update txq RS bit counters */ txq->nb_used = 0; } - txd->cmd_type_offset_bsz |= - rte_cpu_to_le_64(((uint64_t)td_cmd) << - IAVF_TXD_QW1_CMD_SHIFT); - IAVF_DUMP_TX_DESC(txq, txd, tx_id); + ddesc->cmd_type_offset_bsz |= rte_cpu_to_le_64(ddesc_cmd << + IAVF_TXD_DATA_QW1_CMD_SHIFT); + + IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx - 1); } end_of_tx: rte_wmb(); PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u", - txq->port_id, txq->queue_id, tx_id, nb_tx); + txq->port_id, txq->queue_id, desc_idx, idx); - IAVF_PCI_REG_WC_WRITE_RELAXED(txq->qtx_tail, tx_id); - txq->tx_tail = tx_id; + IAVF_PCI_REG_WRITE_RELAXED(txq->qtx_tail, desc_idx); + txq->tx_tail = desc_idx; - return nb_tx; + return idx; } /* Check if the packet with vlan user priority is transmitted in the diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h index 84351011f1..1da1278452 100644 --- a/drivers/net/iavf/iavf_rxtx.h +++ b/drivers/net/iavf/iavf_rxtx.h @@ -403,6 +403,112 @@ enum iavf_rx_flex_desc_status_error_1_bits { IAVF_RX_FLEX_DESC_STATUS1_LAST /* this entry must be last!!! */ }; + +#define IAVF_TXD_DATA_QW1_DTYPE_SHIFT (0) +#define IAVF_TXD_DATA_QW1_DTYPE_MASK (0xFUL << IAVF_TXD_QW1_DTYPE_SHIFT) + +#define IAVF_TXD_DATA_QW1_CMD_SHIFT (4) +#define IAVF_TXD_DATA_QW1_CMD_MASK (0x3FFUL << IAVF_TXD_DATA_QW1_CMD_SHIFT) + +#define IAVF_TXD_DATA_QW1_OFFSET_SHIFT (16) +#define IAVF_TXD_DATA_QW1_OFFSET_MASK (0x3FFFFULL << \ + IAVF_TXD_DATA_QW1_OFFSET_SHIFT) + +#define IAVF_TXD_DATA_QW1_OFFSET_MACLEN_SHIFT (IAVF_TXD_DATA_QW1_OFFSET_SHIFT) +#define IAVF_TXD_DATA_QW1_OFFSET_MACLEN_MASK \ + (0x7FUL << IAVF_TXD_DATA_QW1_OFFSET_MACLEN_SHIFT) + +#define IAVF_TXD_DATA_QW1_OFFSET_IPLEN_SHIFT \ + (IAVF_TXD_DATA_QW1_OFFSET_SHIFT + IAVF_TX_DESC_LENGTH_IPLEN_SHIFT) +#define IAVF_TXD_DATA_QW1_OFFSET_IPLEN_MASK \ + (0x7FUL << IAVF_TXD_DATA_QW1_OFFSET_IPLEN_SHIFT) + +#define IAVF_TXD_DATA_QW1_OFFSET_L4LEN_SHIFT \ + (IAVF_TXD_DATA_QW1_OFFSET_SHIFT + IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT) +#define IAVF_TXD_DATA_QW1_OFFSET_L4LEN_MASK \ + (0xFUL << IAVF_TXD_DATA_QW1_OFFSET_L4LEN_SHIFT) + +#define IAVF_TXD_DATA_QW1_MACLEN_MASK \ + (0x7FUL << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT) +#define IAVF_TXD_DATA_QW1_IPLEN_MASK \ + (0x7FUL << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT) +#define IAVF_TXD_DATA_QW1_L4LEN_MASK \ + (0xFUL << IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT) +#define IAVF_TXD_DATA_QW1_FCLEN_MASK \ + (0xFUL << IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT) + +#define IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT (34) +#define IAVF_TXD_DATA_QW1_TX_BUF_SZ_MASK \ + (0x3FFFULL << IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT) + +#define IAVF_TXD_DATA_QW1_L2TAG1_SHIFT (48) +#define IAVF_TXD_DATA_QW1_L2TAG1_MASK \ + (0xFFFFULL << IAVF_TXD_DATA_QW1_L2TAG1_SHIFT) + +#define IAVF_TXD_CTX_QW1_IPSEC_PARAMS_CIPHERBLK_SHIFT (11) +#define IAVF_TXD_CTX_QW1_IPSEC_PARAMS_CIPHERBLK_MASK \ + (0x7UL << IAVF_TXD_CTX_QW1_IPSEC_PARAMS_CIPHERBLK_SHIFT) + +#define IAVF_TXD_CTX_QW1_IPSEC_PARAMS_ICVLEN_SHIFT (14) +#define IAVF_TXD_CTX_QW1_IPSEC_PARAMS_ICVLEN_MASK \ + (0xFUL << IAVF_TXD_CTX_QW1_IPSEC_PARAMS_ICVLEN_SHIFT) + +#define IAVF_TXD_CTX_QW1_SEG_PARAMS_TLEN_SHIFT (30) +#define IAVF_TXD_CTX_QW1_SEG_PARAMS_TLEN_MASK \ + (0x3FFFFUL << IAVF_TXD_CTX_QW1_SEG_PARAMS_TLEN_SHIFT) + +#define IAVF_TXD_CTX_QW1_TSYNC_PARAMS_TLEN_SHIFT (30) +#define IAVF_TXD_CTX_QW1_TSYNC_PARAMS_TLEN_MASK \ + (0x3FUL << IAVF_TXD_CTX_QW1_SEG_PARAMS_TLEN_SHIFT) + +#define IAVF_TXD_CTX_QW1_SEG_PARAMS_MSS_SHIFT (50) +#define IAVF_TXD_CTX_QW1_SEG_PARAMS_MSS_MASK \ + (0x3FFFUL << IAVF_TXD_CTX_QW1_SEG_PARAMS_MSS_SHIFT) + +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPT_SHIFT (0) +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPT_MASK (0x3UL) + +enum iavf_tx_ctx_desc_tunnel_external_ip_type { + IAVF_TX_CTX_DESC_EIPT_NONE, + IAVF_TX_CTX_DESC_EIPT_IPV6, + IAVF_TX_CTX_DESC_EIPT_IPV4_NO_CHECKSUM_OFFLOAD, + IAVF_TX_CTX_DESC_EIPT_IPV4_CHECKSUM_OFFLOAD +}; + +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPLEN_SHIFT (2) +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPLEN_MASK (0x7FUL) + +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_L4TUNT_SHIFT (9) +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_L4TUNT_MASK (0x3UL) + +enum iavf_tx_ctx_desc_tunnel_l4_tunnel_type { + IAVF_TX_CTX_DESC_L4_TUN_TYP_NO_UDP_GRE, + IAVF_TX_CTX_DESC_L4_TUN_TYP_UDP, + IAVF_TX_CTX_DESC_L4_TUN_TYP_GRE +}; + +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_EIP_NOINC_SHIFT (11) +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_EIP_NOINC_MASK (0x1UL) + +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_L4TUNLEN_SHIFT (12) +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_L4TUNLEN_MASK (0x7FUL) + +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_DECTTL_SHIFT (19) +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_DECTTL_MASK (0xFUL) + +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_L4T_CS_SHIFT (23) +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_L4T_CS_MASK (0x1UL) + +#define IAVF_TXD_CTX_QW0_L2TAG2_PARAM (32) +#define IAVF_TXD_CTX_QW0_L2TAG2_MASK (0xFFFFUL) + + +#define IAVF_RX_FLEX_DESC_IPSEC_CRYPTO_SAID_MASK (0xFFFFF) + +/* for iavf_32b_rx_flex_desc.ptype_flex_flags0 member */ +#define IAVF_RX_FLEX_DESC_PTYPE_M (0x3FF) /* 10-bits */ + + /* for iavf_32b_rx_flex_desc.ptype_flex_flags0 member */ #define IAVF_RX_FLEX_DESC_PTYPE_M (0x3FF) /* 10-bits */ @@ -553,9 +659,10 @@ void iavf_dump_tx_descriptor(const struct iavf_tx_queue *txq, const volatile struct iavf_tx_desc *tx_desc = desc; enum iavf_tx_desc_dtype_value type; - type = (enum iavf_tx_desc_dtype_value)rte_le_to_cpu_64( - tx_desc->cmd_type_offset_bsz & - rte_cpu_to_le_64(IAVF_TXD_QW1_DTYPE_MASK)); + + type = (enum iavf_tx_desc_dtype_value) + rte_le_to_cpu_64(tx_desc->cmd_type_offset_bsz & + rte_cpu_to_le_64(IAVF_TXD_DATA_QW1_DTYPE_MASK)); switch (type) { case IAVF_TX_DESC_DTYPE_DATA: name = "Tx_data_desc"; @@ -569,8 +676,8 @@ void iavf_dump_tx_descriptor(const struct iavf_tx_queue *txq, } printf("Queue %d %s %d: QW0: 0x%016"PRIx64" QW1: 0x%016"PRIx64"\n", - txq->queue_id, name, tx_id, tx_desc->buffer_addr, - tx_desc->cmd_type_offset_bsz); + txq->queue_id, name, tx_id, tx_desc->buffer_addr, + tx_desc->cmd_type_offset_bsz); } #define FDIR_PROC_ENABLE_PER_QUEUE(ad, on) do { \ diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c index d4f4d705b7..6d42ae9373 100644 --- a/drivers/net/iavf/iavf_rxtx_vec_sse.c +++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c @@ -363,10 +363,12 @@ static inline void flex_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts, const uint32_t *type_table) { - const __m128i ptype_mask = _mm_set_epi16(0, IAVF_RX_FLEX_DESC_PTYPE_M, - 0, IAVF_RX_FLEX_DESC_PTYPE_M, - 0, IAVF_RX_FLEX_DESC_PTYPE_M, - 0, IAVF_RX_FLEX_DESC_PTYPE_M); + const __m128i ptype_mask = _mm_set_epi16( + IAVF_RX_FLEX_DESC_PTYPE_M, 0x0, + IAVF_RX_FLEX_DESC_PTYPE_M, 0x0, + IAVF_RX_FLEX_DESC_PTYPE_M, 0x0, + IAVF_RX_FLEX_DESC_PTYPE_M, 0x0); + __m128i ptype_01 = _mm_unpacklo_epi32(descs[0], descs[1]); __m128i ptype_23 = _mm_unpacklo_epi32(descs[2], descs[3]); __m128i ptype_all = _mm_unpacklo_epi64(ptype_01, ptype_23); -- 2.25.1