From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 928C0A09E0 for ; Fri, 13 Nov 2020 14:37:08 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 532E64C96; Fri, 13 Nov 2020 14:37:03 +0100 (CET) Received: from szxga07-in.huawei.com (szxga07-in.huawei.com [45.249.212.35]) by dpdk.org (Postfix) with ESMTP id B8175C8A8 for ; Fri, 13 Nov 2020 14:36:59 +0100 (CET) Received: from DGGEMS404-HUB.china.huawei.com (unknown [172.30.72.59]) by szxga07-in.huawei.com (SkyGuard) with ESMTP id 4CXfdQ1Cy2z76KH for ; Fri, 13 Nov 2020 21:36:42 +0800 (CST) Received: from localhost.localdomain (10.69.192.56) by DGGEMS404-HUB.china.huawei.com (10.3.19.204) with Microsoft SMTP Server id 14.3.487.0; Fri, 13 Nov 2020 21:36:43 +0800 From: Lijun Ou To: , CC: Date: Fri, 13 Nov 2020 21:36:58 +0800 Message-ID: <1605274630-23414-2-git-send-email-oulijun@huawei.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1605274630-23414-1-git-send-email-oulijun@huawei.com> References: <1605274630-23414-1-git-send-email-oulijun@huawei.com> MIME-Version: 1.0 Content-Type: text/plain X-Originating-IP: [10.69.192.56] X-CFilter-Loop: Reflected Subject: [dpdk-stable] [PATCH 19.11.6 01/13] net/hns3: support TSO X-BeenThere: stable@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches for DPDK stable branches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: stable-bounces@dpdk.org Sender: "stable" [ upstream commit 6dca716c9e1daa8ea770a4a198bd068e72a2e03c ] This patch adds TCP segment offload support for hns3 PMD driver. Signed-off-by: Hongbo Zheng Signed-off-by: Wei Hu (Xavier) --- doc/guides/nics/features/hns3.ini | 1 + doc/guides/nics/features/hns3_vf.ini | 1 + doc/guides/nics/hns3.rst | 1 + drivers/net/hns3/hns3_ethdev.c | 4 + drivers/net/hns3/hns3_ethdev.h | 6 +- drivers/net/hns3/hns3_ethdev_vf.c | 4 + drivers/net/hns3/hns3_rxtx.c | 258 +++++++++++++++++++++++++++++++++-- 7 files changed, 259 insertions(+), 16 deletions(-) diff --git a/doc/guides/nics/features/hns3.ini b/doc/guides/nics/features/hns3.ini index cd5c08a..c3a8544 100644 --- a/doc/guides/nics/features/hns3.ini +++ b/doc/guides/nics/features/hns3.ini @@ -8,6 +8,7 @@ Link status = Y Rx interrupt = Y MTU update = Y Jumbo frame = Y +TSO = Y Promiscuous mode = Y Allmulticast mode = Y Unicast MAC filter = Y diff --git a/doc/guides/nics/features/hns3_vf.ini b/doc/guides/nics/features/hns3_vf.ini index fd00ac3..e4e7738 100644 --- a/doc/guides/nics/features/hns3_vf.ini +++ b/doc/guides/nics/features/hns3_vf.ini @@ -8,6 +8,7 @@ Link status = Y Rx interrupt = Y MTU update = Y Jumbo frame = Y +TSO = Y Unicast MAC filter = Y Multicast MAC filter = Y RSS hash = Y diff --git a/doc/guides/nics/hns3.rst b/doc/guides/nics/hns3.rst index 8d19f48..05dbe41 100644 --- a/doc/guides/nics/hns3.rst +++ b/doc/guides/nics/hns3.rst @@ -17,6 +17,7 @@ Features of the HNS3 PMD are: - Receive Side Scaling (RSS) - Packet type information - Checksum offload +- TSO offload - Promiscuous mode - Multicast mode - Port hardware statistics diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c index af43a44..23c16b8 100644 --- a/drivers/net/hns3/hns3_ethdev.c +++ b/drivers/net/hns3/hns3_ethdev.c @@ -2468,6 +2468,10 @@ hns3_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info) DEV_TX_OFFLOAD_VLAN_INSERT | DEV_TX_OFFLOAD_QINQ_INSERT | DEV_TX_OFFLOAD_MULTI_SEGS | + DEV_TX_OFFLOAD_TCP_TSO | + DEV_TX_OFFLOAD_VXLAN_TNL_TSO | + DEV_TX_OFFLOAD_GRE_TNL_TSO | + DEV_TX_OFFLOAD_GENEVE_TNL_TSO | DEV_TX_OFFLOAD_MBUF_FAST_FREE); info->rx_desc_lim = (struct rte_eth_desc_lim) { diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h index 6e9173a..f63d01d 100644 --- a/drivers/net/hns3/hns3_ethdev.h +++ b/drivers/net/hns3/hns3_ethdev.h @@ -31,11 +31,15 @@ #define HNS3_MC_MACADDR_NUM 128 #define HNS3_MAX_BD_SIZE 65535 -#define HNS3_MAX_TX_BD_PER_PKT 8 +#define HNS3_MAX_NON_TSO_BD_PER_PKT 8 +#define HNS3_MAX_TSO_BD_PER_PKT 63 #define HNS3_MAX_FRAME_LEN 9728 #define HNS3_MIN_FRAME_LEN 64 #define HNS3_VLAN_TAG_SIZE 4 #define HNS3_DEFAULT_RX_BUF_LEN 2048 +#define HNS3_MAX_BD_PAYLEN (1024 * 1024 - 1) +#define HNS3_MAX_TSO_HDR_SIZE 512 +#define HNS3_MAX_TSO_HDR_BD_NUM 3 #define HNS3_ETH_OVERHEAD \ (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + HNS3_VLAN_TAG_SIZE * 2) diff --git a/drivers/net/hns3/hns3_ethdev_vf.c b/drivers/net/hns3/hns3_ethdev_vf.c index 5d1da44..736d8a7 100644 --- a/drivers/net/hns3/hns3_ethdev_vf.c +++ b/drivers/net/hns3/hns3_ethdev_vf.c @@ -850,6 +850,10 @@ hns3vf_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info) DEV_TX_OFFLOAD_VLAN_INSERT | DEV_TX_OFFLOAD_QINQ_INSERT | DEV_TX_OFFLOAD_MULTI_SEGS | + DEV_TX_OFFLOAD_TCP_TSO | + DEV_TX_OFFLOAD_VXLAN_TNL_TSO | + DEV_TX_OFFLOAD_GRE_TNL_TSO | + DEV_TX_OFFLOAD_GENEVE_TNL_TSO | DEV_TX_OFFLOAD_MBUF_FAST_FREE); info->rx_desc_lim = (struct rte_eth_desc_lim) { diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c index c1ffa13..28a9334 100644 --- a/drivers/net/hns3/hns3_rxtx.c +++ b/drivers/net/hns3/hns3_rxtx.c @@ -1858,6 +1858,78 @@ hns3_tx_free_useless_buffer(struct hns3_tx_queue *txq) txq->tx_bd_ready = tx_bd_ready; } +static int +hns3_tso_proc_tunnel(struct hns3_desc *desc, uint64_t ol_flags, + struct rte_mbuf *rxm, uint8_t *l2_len) +{ + uint64_t tun_flags; + uint8_t ol4_len; + uint32_t otmp; + + tun_flags = ol_flags & PKT_TX_TUNNEL_MASK; + if (tun_flags == 0) + return 0; + + otmp = rte_le_to_cpu_32(desc->tx.ol_type_vlan_len_msec); + switch (tun_flags) { + case PKT_TX_TUNNEL_GENEVE: + case PKT_TX_TUNNEL_VXLAN: + *l2_len = rxm->l2_len - RTE_ETHER_VXLAN_HLEN; + break; + case PKT_TX_TUNNEL_GRE: + /* + * OL4 header size, defined in 4 Bytes, it contains outer + * L4(GRE) length and tunneling length. + */ + ol4_len = hns3_get_field(otmp, HNS3_TXD_L4LEN_M, + HNS3_TXD_L4LEN_S); + *l2_len = rxm->l2_len - (ol4_len << HNS3_L4_LEN_UNIT); + break; + default: + /* For non UDP / GRE tunneling, drop the tunnel packet */ + return -EINVAL; + } + hns3_set_field(otmp, HNS3_TXD_L2LEN_M, HNS3_TXD_L2LEN_S, + rxm->outer_l2_len >> HNS3_L2_LEN_UNIT); + desc->tx.ol_type_vlan_len_msec = rte_cpu_to_le_32(otmp); + + return 0; +} + +static void +hns3_set_tso(struct hns3_desc *desc, + uint64_t ol_flags, struct rte_mbuf *rxm) +{ + uint32_t paylen, hdr_len; + uint32_t tmp; + uint8_t l2_len = rxm->l2_len; + + if (!(ol_flags & PKT_TX_TCP_SEG)) + return; + + if (hns3_tso_proc_tunnel(desc, ol_flags, rxm, &l2_len)) + return; + + hdr_len = rxm->l2_len + rxm->l3_len + rxm->l4_len; + hdr_len += (ol_flags & PKT_TX_TUNNEL_MASK) ? + rxm->outer_l2_len + rxm->outer_l3_len : 0; + paylen = rxm->pkt_len - hdr_len; + if (paylen <= rxm->tso_segsz) + return; + + tmp = rte_le_to_cpu_32(desc->tx.type_cs_vlan_tso_len); + hns3_set_bit(tmp, HNS3_TXD_TSO_B, 1); + hns3_set_bit(tmp, HNS3_TXD_L3CS_B, 1); + hns3_set_field(tmp, HNS3_TXD_L4T_M, HNS3_TXD_L4T_S, HNS3_L4T_TCP); + hns3_set_bit(tmp, HNS3_TXD_L4CS_B, 1); + hns3_set_field(tmp, HNS3_TXD_L4LEN_M, HNS3_TXD_L4LEN_S, + sizeof(struct rte_tcp_hdr) >> HNS3_L4_LEN_UNIT); + hns3_set_field(tmp, HNS3_TXD_L2LEN_M, HNS3_TXD_L2LEN_S, + l2_len >> HNS3_L2_LEN_UNIT); + desc->tx.type_cs_vlan_tso_len = rte_cpu_to_le_32(tmp); + desc->tx.mss = rte_cpu_to_le_16(rxm->tso_segsz); +} + static void fill_desc(struct hns3_tx_queue *txq, uint16_t tx_desc_id, struct rte_mbuf *rxm, bool first, int offset) @@ -1865,9 +1937,9 @@ fill_desc(struct hns3_tx_queue *txq, uint16_t tx_desc_id, struct rte_mbuf *rxm, struct hns3_desc *tx_ring = txq->tx_ring; struct hns3_desc *desc = &tx_ring[tx_desc_id]; uint8_t frag_end = rxm->next == NULL ? 1 : 0; + uint64_t ol_flags = rxm->ol_flags; uint16_t size = rxm->data_len; uint16_t rrcfv = 0; - uint64_t ol_flags = rxm->ol_flags; uint32_t hdr_len; uint32_t paylen; uint32_t tmp; @@ -1882,6 +1954,7 @@ fill_desc(struct hns3_tx_queue *txq, uint16_t tx_desc_id, struct rte_mbuf *rxm, rxm->outer_l2_len + rxm->outer_l3_len : 0; paylen = rxm->pkt_len - hdr_len; desc->tx.paylen = rte_cpu_to_le_32(paylen); + hns3_set_tso(desc, ol_flags, rxm); } hns3_set_bit(rrcfv, HNS3_TXD_FE_B, frag_end); @@ -2195,6 +2268,136 @@ hns3_txd_enable_checksum(struct hns3_tx_queue *txq, uint16_t tx_desc_id, desc->tx.type_cs_vlan_tso_len |= rte_cpu_to_le_32(value); } +static bool +hns3_pkt_need_linearized(struct rte_mbuf *tx_pkts, uint32_t bd_num) +{ + struct rte_mbuf *m_first = tx_pkts; + struct rte_mbuf *m_last = tx_pkts; + uint32_t tot_len = 0; + uint32_t hdr_len; + uint32_t i; + + /* + * Hardware requires that the sum of the data length of every 8 + * consecutive buffers is greater than MSS in hns3 network engine. + * We simplify it by ensuring pkt_headlen + the first 8 consecutive + * frags greater than gso header len + mss, and the remaining 7 + * consecutive frags greater than MSS except the last 7 frags. + */ + if (bd_num <= HNS3_MAX_NON_TSO_BD_PER_PKT) + return false; + + for (i = 0; m_last && i < HNS3_MAX_NON_TSO_BD_PER_PKT - 1; + i++, m_last = m_last->next) + tot_len += m_last->data_len; + + if (!m_last) + return true; + + /* ensure the first 8 frags is greater than mss + header */ + hdr_len = tx_pkts->l2_len + tx_pkts->l3_len + tx_pkts->l4_len; + hdr_len += (tx_pkts->ol_flags & PKT_TX_TUNNEL_MASK) ? + tx_pkts->outer_l2_len + tx_pkts->outer_l3_len : 0; + if (tot_len + m_last->data_len < tx_pkts->tso_segsz + hdr_len) + return true; + + /* + * ensure the sum of the data length of every 7 consecutive buffer + * is greater than mss except the last one. + */ + for (i = 0; m_last && i < bd_num - HNS3_MAX_NON_TSO_BD_PER_PKT; i++) { + tot_len -= m_first->data_len; + tot_len += m_last->data_len; + + if (tot_len < tx_pkts->tso_segsz) + return true; + + m_first = m_first->next; + m_last = m_last->next; + } + + return false; +} + +static void +hns3_outer_header_cksum_prepare(struct rte_mbuf *m) +{ + uint64_t ol_flags = m->ol_flags; + struct rte_ipv4_hdr *ipv4_hdr; + struct rte_udp_hdr *udp_hdr; + uint32_t paylen, hdr_len; + + if (!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6))) + return; + + if (ol_flags & PKT_TX_IPV4) { + ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, + m->outer_l2_len); + + if (ol_flags & PKT_TX_IP_CKSUM) + ipv4_hdr->hdr_checksum = 0; + } + + if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM && + ol_flags & PKT_TX_TCP_SEG) { + hdr_len = m->l2_len + m->l3_len + m->l4_len; + hdr_len += (ol_flags & PKT_TX_TUNNEL_MASK) ? + m->outer_l2_len + m->outer_l3_len : 0; + paylen = m->pkt_len - hdr_len; + if (paylen <= m->tso_segsz) + return; + udp_hdr = rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *, + m->outer_l2_len + + m->outer_l3_len); + udp_hdr->dgram_cksum = 0; + } +} + +static inline bool +hns3_pkt_is_tso(struct rte_mbuf *m) +{ + return (m->tso_segsz != 0 && m->ol_flags & PKT_TX_TCP_SEG); +} + +static int +hns3_check_tso_pkt_valid(struct rte_mbuf *m) +{ + uint32_t tmp_data_len_sum = 0; + uint16_t nb_buf = m->nb_segs; + uint32_t paylen, hdr_len; + struct rte_mbuf *m_seg; + int i; + + if (nb_buf > HNS3_MAX_TSO_BD_PER_PKT) + return -EINVAL; + + hdr_len = m->l2_len + m->l3_len + m->l4_len; + hdr_len += (m->ol_flags & PKT_TX_TUNNEL_MASK) ? + m->outer_l2_len + m->outer_l3_len : 0; + if (hdr_len > HNS3_MAX_TSO_HDR_SIZE) + return -EINVAL; + + paylen = m->pkt_len - hdr_len; + if (paylen > HNS3_MAX_BD_PAYLEN) + return -EINVAL; + + /* + * The TSO header (include outer and inner L2, L3 and L4 header) + * should be provided by three descriptors in maximum in hns3 network + * engine. + */ + m_seg = m; + for (i = 0; m_seg != NULL && i < HNS3_MAX_TSO_HDR_BD_NUM && i < nb_buf; + i++, m_seg = m_seg->next) { + tmp_data_len_sum += m_seg->data_len; + } + + if (hdr_len > tmp_data_len_sum) + return -EINVAL; + + return 0; +} + uint16_t hns3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) @@ -2206,6 +2409,13 @@ hns3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, for (i = 0; i < nb_pkts; i++) { m = tx_pkts[i]; + if (hns3_pkt_is_tso(m) && + (hns3_pkt_need_linearized(m, m->nb_segs) | + hns3_check_tso_pkt_valid(m))) { + rte_errno = EINVAL; + return i; + } + #ifdef RTE_LIBRTE_ETHDEV_DEBUG ret = rte_validate_tx_offload(m); if (ret != 0) { @@ -2218,6 +2428,8 @@ hns3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, rte_errno = -ret; return i; } + + hns3_outer_header_cksum_prepare(m); } return i; @@ -2241,13 +2453,39 @@ hns3_parse_cksum(struct hns3_tx_queue *txq, uint16_t tx_desc_id, return 0; } +static int +hns3_check_non_tso_pkt(uint16_t nb_buf, struct rte_mbuf **m_seg, + struct rte_mbuf *tx_pkt, struct hns3_tx_queue *txq) +{ + struct rte_mbuf *new_pkt; + int ret; + + if (hns3_pkt_is_tso(*m_seg)) + return 0; + + /* + * If packet length is greater than HNS3_MAX_FRAME_LEN + * driver support, the packet will be ignored. + */ + if (unlikely(rte_pktmbuf_pkt_len(tx_pkt) > HNS3_MAX_FRAME_LEN)) + return -EINVAL; + + if (unlikely(nb_buf > HNS3_MAX_NON_TSO_BD_PER_PKT)) { + ret = hns3_reassemble_tx_pkts(txq, tx_pkt, &new_pkt); + if (ret) + return ret; + *m_seg = new_pkt; + } + + return 0; +} + uint16_t hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { struct rte_net_hdr_lens hdr_lens = {0}; struct hns3_tx_queue *txq = tx_queue; struct hns3_entry *tx_bak_pkt; - struct rte_mbuf *new_pkt; struct rte_mbuf *tx_pkt; struct rte_mbuf *m_seg; uint32_t nb_hold = 0; @@ -2280,13 +2518,6 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) } /* - * If packet length is greater than HNS3_MAX_FRAME_LEN - * driver support, the packet will be ignored. - */ - if (unlikely(rte_pktmbuf_pkt_len(tx_pkt) > HNS3_MAX_FRAME_LEN)) - break; - - /* * If packet length is less than minimum packet size, driver * need to pad it. */ @@ -2304,12 +2535,9 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) } m_seg = tx_pkt; - if (unlikely(nb_buf > HNS3_MAX_TX_BD_PER_PKT)) { - if (hns3_reassemble_tx_pkts(txq, tx_pkt, &new_pkt)) - goto end_of_tx; - m_seg = new_pkt; - nb_buf = m_seg->nb_segs; - } + + if (hns3_check_non_tso_pkt(nb_buf, &m_seg, tx_pkt, txq)) + goto end_of_tx; if (hns3_parse_cksum(txq, tx_next_use, m_seg, &hdr_lens)) goto end_of_tx; -- 2.7.4