From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 94FA0A04A4; Sat, 18 Dec 2021 03:54:27 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id EB224426E0; Sat, 18 Dec 2021 03:52:42 +0100 (CET) Received: from VLXDG1SPAM1.ramaxel.com (email.ramaxel.com [221.4.138.186]) by mails.dpdk.org (Postfix) with ESMTP id 6D5DB411FE for ; Sat, 18 Dec 2021 03:52:38 +0100 (CET) Received: from V12DG1MBS01.ramaxel.local (v12dg1mbs01.ramaxel.local [172.26.18.31]) by VLXDG1SPAM1.ramaxel.com with ESMTPS id 1BI2q1hZ010344 (version=TLSv1.2 cipher=AES256-GCM-SHA384 bits=256 verify=FAIL); Sat, 18 Dec 2021 10:52:01 +0800 (GMT-8) (envelope-from songyl@ramaxel.com) Received: from localhost.localdomain (10.64.9.47) by V12DG1MBS01.ramaxel.local (172.26.18.31) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256_P256) id 15.1.2308.14; Sat, 18 Dec 2021 10:52:01 +0800 From: Yanling Song To: CC: , , , Subject: [PATCH v1 15/25] net/spnic: support IO packets handling Date: Sat, 18 Dec 2021 10:51:42 +0800 Message-ID: <43c18694d8e072ced3f9dfdc3917f04c0547da80.1639636621.git.songyl@ramaxel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: References: MIME-Version: 1.0 Content-Type: text/plain X-Originating-IP: [10.64.9.47] X-ClientProxiedBy: V12DG1MBS01.ramaxel.local (172.26.18.31) To V12DG1MBS01.ramaxel.local (172.26.18.31) X-DNSRBL: X-MAIL: VLXDG1SPAM1.ramaxel.com 1BI2q1hZ010344 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org This patch implements rx_pkt_burst() and tx_pkt_burst() to hanld IO packets. For Tx packets, this commit implements parsing ol_flags of mbuf and filling those offload info on wqe so that hardware can process the packets correctly. Furthermore, this commit allocates a mempool to cover scenes with too many mbufs for one packet. For Rx packets, this commit implements filling ol_flags of mbuf and rearming new mbuf and rq wqe. Signed-off-by: Yanling Song --- drivers/net/spnic/spnic_ethdev.c | 48 +++ drivers/net/spnic/spnic_ethdev.h | 7 + drivers/net/spnic/spnic_rx.c | 209 ++++++++++++ drivers/net/spnic/spnic_rx.h | 137 ++++++++ drivers/net/spnic/spnic_tx.c | 524 +++++++++++++++++++++++++++++++ drivers/net/spnic/spnic_tx.h | 7 + 6 files changed, 932 insertions(+) diff --git a/drivers/net/spnic/spnic_ethdev.c b/drivers/net/spnic/spnic_ethdev.c index 826a34f7fc..b4d20e1a6f 100644 --- a/drivers/net/spnic/spnic_ethdev.c +++ b/drivers/net/spnic/spnic_ethdev.c @@ -970,6 +970,32 @@ static int spnic_dev_start(struct rte_eth_dev *eth_dev) return err; } +static int spnic_copy_mempool_init(struct spnic_nic_dev *nic_dev) +{ + nic_dev->cpy_mpool = rte_mempool_lookup(nic_dev->dev_name); + if (nic_dev->cpy_mpool == NULL) { + nic_dev->cpy_mpool = + rte_pktmbuf_pool_create(nic_dev->dev_name, + SPNIC_COPY_MEMPOOL_DEPTH, 0, 0, + SPNIC_COPY_MBUF_SIZE, rte_socket_id()); + if (nic_dev->cpy_mpool == NULL) { + PMD_DRV_LOG(ERR, "Create copy mempool failed, errno: %d, dev_name: %s", + rte_errno, nic_dev->dev_name); + return -ENOMEM; + } + } + + return 0; +} + +static void spnic_copy_mempool_uninit(struct spnic_nic_dev *nic_dev) +{ + if (nic_dev->cpy_mpool != NULL) { + rte_mempool_free(nic_dev->cpy_mpool); + nic_dev->cpy_mpool = NULL; + } +} + /** * Stop the device. * @@ -986,6 +1012,9 @@ static int spnic_dev_stop(struct rte_eth_dev *dev) int err; nic_dev = SPNIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev); + if (!nic_dev || !spnic_support_nic(nic_dev->hwdev)) + return 0; + if (!rte_bit_relaxed_test_and_clear32(SPNIC_DEV_START, &nic_dev->dev_status)) { PMD_DRV_LOG(INFO, "Device %s already stopped", nic_dev->dev_name); @@ -1014,6 +1043,11 @@ static int spnic_dev_stop(struct rte_eth_dev *dev) spnic_flush_qps_res(nic_dev->hwdev); + /* + * After set vport disable 100ms, no packets will be send to host + */ + rte_delay_ms(100); + /* Clean RSS table and rx_mode */ spnic_remove_rxtx_configure(dev); @@ -1054,6 +1088,7 @@ static int spnic_dev_close(struct rte_eth_dev *eth_dev) for (qid = 0; qid < nic_dev->num_rqs; qid++) spnic_rx_queue_release(eth_dev, qid); + spnic_copy_mempool_uninit(nic_dev); spnic_deinit_sw_rxtxqs(nic_dev); spnic_deinit_mac_addr(eth_dev); rte_free(nic_dev->mc_list); @@ -1067,6 +1102,8 @@ static int spnic_dev_close(struct rte_eth_dev *eth_dev) spnic_free_nic_hwdev(nic_dev->hwdev); spnic_free_hwdev(nic_dev->hwdev); + eth_dev->rx_pkt_burst = NULL; + eth_dev->tx_pkt_burst = NULL; eth_dev->dev_ops = NULL; rte_free(nic_dev->hwdev); @@ -1548,6 +1585,13 @@ static int spnic_func_init(struct rte_eth_dev *eth_dev) goto set_default_feature_fail; } + err = spnic_copy_mempool_init(nic_dev); + if (err) { + PMD_DRV_LOG(ERR, "Create copy mempool failed, dev_name: %s", + eth_dev->data->name); + goto init_mpool_fail; + } + spnic_mutex_init(&nic_dev->rx_mode_mutex, NULL); rte_bit_relaxed_set32(SPNIC_DEV_INTR_EN, &nic_dev->dev_status); @@ -1558,6 +1602,7 @@ static int spnic_func_init(struct rte_eth_dev *eth_dev) return 0; +init_mpool_fail: set_default_feature_fail: spnic_deinit_mac_addr(eth_dev); @@ -1602,6 +1647,9 @@ static int spnic_dev_init(struct rte_eth_dev *eth_dev) (rte_eal_process_type() == RTE_PROC_PRIMARY) ? "primary" : "secondary"); + eth_dev->rx_pkt_burst = spnic_recv_pkts; + eth_dev->tx_pkt_burst = spnic_xmit_pkts; + return spnic_func_init(eth_dev); } diff --git a/drivers/net/spnic/spnic_ethdev.h b/drivers/net/spnic/spnic_ethdev.h index 996b4e4b8f..2b59886942 100644 --- a/drivers/net/spnic/spnic_ethdev.h +++ b/drivers/net/spnic/spnic_ethdev.h @@ -4,6 +4,9 @@ #ifndef _SPNIC_ETHDEV_H_ #define _SPNIC_ETHDEV_H_ + +#define SPNIC_COPY_MEMPOOL_DEPTH 128 +#define SPNIC_COPY_MBUF_SIZE 4096 #define SPNIC_DEV_NAME_LEN 32 #define SPNIC_UINT32_BIT_SIZE (CHAR_BIT * sizeof(uint32_t)) @@ -17,6 +20,10 @@ enum spnic_dev_status { SPNIC_DEV_INTR_EN }; +enum spnic_tx_cvlan_type { + SPNIC_TX_TPID0, +}; + enum nic_feature_cap { NIC_F_CSUM = BIT(0), NIC_F_SCTP_CRC = BIT(1), diff --git a/drivers/net/spnic/spnic_rx.c b/drivers/net/spnic/spnic_rx.c index 4d8c6c7e60..5af836ed41 100644 --- a/drivers/net/spnic/spnic_rx.c +++ b/drivers/net/spnic/spnic_rx.c @@ -486,6 +486,117 @@ void spnic_remove_rq_from_rx_queue_list(struct spnic_nic_dev *nic_dev, nic_dev->num_rss = rss_queue_count; } + +static inline uint64_t spnic_rx_vlan(uint32_t offload_type, uint32_t vlan_len, + uint16_t *vlan_tci) +{ + uint16_t vlan_tag; + + vlan_tag = SPNIC_GET_RX_VLAN_TAG(vlan_len); + if (!SPNIC_GET_RX_VLAN_OFFLOAD_EN(offload_type) || vlan_tag == 0) { + *vlan_tci = 0; + return 0; + } + + *vlan_tci = vlan_tag; + + return RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; +} + +static inline uint64_t spnic_rx_csum(uint32_t status, struct spnic_rxq *rxq) +{ + struct spnic_nic_dev *nic_dev = rxq->nic_dev; + uint32_t csum_err; + uint64_t flags; + + if (unlikely(!(nic_dev->rx_csum_en & SPNIC_DEFAULT_RX_CSUM_OFFLOAD))) + return RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN; + + /* Most case checksum is ok */ + csum_err = SPNIC_GET_RX_CSUM_ERR(status); + if (likely(csum_err == 0)) + return (RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD); + + /* + * If bypass bit is set, all other err status indications should be + * ignored + */ + if (unlikely(csum_err & SPNIC_RX_CSUM_HW_CHECK_NONE)) + return RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN; + + flags = 0; + + /* IP checksum error */ + if (csum_err & SPNIC_RX_CSUM_IP_CSUM_ERR) { + flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; + rxq->rxq_stats.errors++; + } + + /* L4 checksum error */ + if (csum_err & SPNIC_RX_CSUM_TCP_CSUM_ERR || + csum_err & SPNIC_RX_CSUM_UDP_CSUM_ERR || + csum_err & SPNIC_RX_CSUM_SCTP_CRC_ERR) { + flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; + rxq->rxq_stats.errors++; + } + + if (unlikely(csum_err == SPNIC_RX_CSUM_IPSU_OTHER_ERR)) + rxq->rxq_stats.other_errors++; + + return flags; +} + +static inline uint64_t spnic_rx_rss_hash(uint32_t offload_type, + uint32_t rss_hash_value, + uint32_t *rss_hash) +{ + uint32_t rss_type; + + rss_type = SPNIC_GET_RSS_TYPES(offload_type); + if (likely(rss_type != 0)) { + *rss_hash = rss_hash_value; + return RTE_MBUF_F_RX_RSS_HASH; + } + + return 0; +} + +static void spnic_recv_jumbo_pkt(struct spnic_rxq *rxq, + struct rte_mbuf *head_mbuf, + u32 remain_pkt_len) +{ + struct rte_mbuf *cur_mbuf = NULL; + struct rte_mbuf *rxm = NULL; + struct spnic_rx_info *rx_info = NULL; + u16 sw_ci, rx_buf_len = rxq->buf_len; + u32 pkt_len; + + while (remain_pkt_len > 0) { + sw_ci = spnic_get_rq_local_ci(rxq); + rx_info = &rxq->rx_info[sw_ci]; + + spnic_update_rq_local_ci(rxq, 1); + + pkt_len = remain_pkt_len > rx_buf_len ? + rx_buf_len : remain_pkt_len; + remain_pkt_len -= pkt_len; + + cur_mbuf = rx_info->mbuf; + cur_mbuf->data_len = (u16)pkt_len; + cur_mbuf->next = NULL; + + head_mbuf->pkt_len += cur_mbuf->data_len; + head_mbuf->nb_segs++; + + if (!rxm) + head_mbuf->next = cur_mbuf; + else + rxm->next = cur_mbuf; + + rxm = cur_mbuf; + } +} + int spnic_start_all_rqs(struct rte_eth_dev *eth_dev) { struct spnic_nic_dev *nic_dev = NULL; @@ -521,3 +632,101 @@ int spnic_start_all_rqs(struct rte_eth_dev *eth_dev) } return err; } + +u16 spnic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, u16 nb_pkts) +{ + struct spnic_rxq *rxq = rx_queue; + struct spnic_rx_info *rx_info = NULL; + volatile struct spnic_rq_cqe *rx_cqe = NULL; + struct rte_mbuf *rxm = NULL; + u16 sw_ci, wqebb_cnt = 0; + u32 status, pkt_len, vlan_len, offload_type, hash_value; + u32 lro_num; + u64 rx_bytes = 0; + u16 rx_buf_len, pkts = 0; + + rx_buf_len = rxq->buf_len; + sw_ci = spnic_get_rq_local_ci(rxq); + + while (pkts < nb_pkts) { + rx_cqe = &rxq->rx_cqe[sw_ci]; + status = rx_cqe->status; + if (!SPNIC_GET_RX_DONE(status)) + break; + + /* Make sure rx_done is read before packet length */ + rte_rmb(); + + vlan_len = rx_cqe->vlan_len; + pkt_len = SPNIC_GET_RX_PKT_LEN(vlan_len); + + rx_info = &rxq->rx_info[sw_ci]; + rxm = rx_info->mbuf; + + /* 1. Next ci point and prefetch */ + sw_ci++; + sw_ci &= rxq->q_mask; + + /* 2. Prefetch next mbuf first 64B */ + rte_prefetch0(rxq->rx_info[sw_ci].mbuf); + + /* 3. Jumbo frame process */ + if (likely(pkt_len <= rx_buf_len)) { + rxm->data_len = pkt_len; + rxm->pkt_len = pkt_len; + wqebb_cnt++; + } else { + rxm->data_len = rx_buf_len; + rxm->pkt_len = rx_buf_len; + + /* If receive jumbo, updating ci will be done by + * spnic_recv_jumbo_pkt function. + */ + spnic_update_rq_local_ci(rxq, wqebb_cnt + 1); + wqebb_cnt = 0; + spnic_recv_jumbo_pkt(rxq, rxm, pkt_len - rx_buf_len); + sw_ci = spnic_get_rq_local_ci(rxq); + } + + rxm->data_off = RTE_PKTMBUF_HEADROOM; + rxm->port = rxq->port_id; + + /* 4. Rx checksum offload */ + rxm->ol_flags |= spnic_rx_csum(status, rxq); + + /* 5. Vlan offload */ + offload_type = rx_cqe->offload_type; + rxm->ol_flags |= spnic_rx_vlan(offload_type, vlan_len, + &rxm->vlan_tci); + /* 6. RSS */ + hash_value = rx_cqe->hash_val; + rxm->ol_flags |= spnic_rx_rss_hash(offload_type, hash_value, + &rxm->hash.rss); + /* 7. LRO */ + lro_num = SPNIC_GET_RX_NUM_LRO(status); + if (unlikely(lro_num != 0)) { + rxm->ol_flags |= RTE_MBUF_F_RX_LRO; + rxm->tso_segsz = pkt_len / lro_num; + } + + rx_cqe->status = 0; + + rx_bytes += pkt_len; + rx_pkts[pkts++] = rxm; + } + + if (pkts) { + /* 8. Update local ci */ + spnic_update_rq_local_ci(rxq, wqebb_cnt); + + /* Update packet stats */ + rxq->rxq_stats.packets += pkts; + rxq->rxq_stats.bytes += rx_bytes; + } + rxq->rxq_stats.burst_pkts = pkts; + + /* 9. Rearm mbuf to rxq */ + spnic_rearm_rxq_mbuf(rxq); + + return pkts; +} diff --git a/drivers/net/spnic/spnic_rx.h b/drivers/net/spnic/spnic_rx.h index 0b534f1904..5ae4b5f1ab 100644 --- a/drivers/net/spnic/spnic_rx.h +++ b/drivers/net/spnic/spnic_rx.h @@ -5,6 +5,135 @@ #ifndef _SPNIC_RX_H_ #define _SPNIC_RX_H_ +#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_SHIFT 0 +#define RQ_CQE_OFFOLAD_TYPE_PKT_UMBCAST_SHIFT 19 +#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_SHIFT 21 +#define RQ_CQE_OFFOLAD_TYPE_RSS_TYPE_SHIFT 24 + +#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_MASK 0xFFFU +#define RQ_CQE_OFFOLAD_TYPE_PKT_UMBCAST_MASK 0x3U +#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_MASK 0x1U +#define RQ_CQE_OFFOLAD_TYPE_RSS_TYPE_MASK 0xFFU + +#define RQ_CQE_OFFOLAD_TYPE_GET(val, member) (((val) >> \ + RQ_CQE_OFFOLAD_TYPE_##member##_SHIFT) & \ + RQ_CQE_OFFOLAD_TYPE_##member##_MASK) + +#define SPNIC_GET_RX_PKT_TYPE(offload_type) \ + RQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_TYPE) + +#define SPNIC_GET_RX_PKT_UMBCAST(offload_type) \ + RQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_UMBCAST) + +#define SPNIC_GET_RX_VLAN_OFFLOAD_EN(offload_type) \ + RQ_CQE_OFFOLAD_TYPE_GET(offload_type, VLAN_EN) + +#define SPNIC_GET_RSS_TYPES(offload_type) \ + RQ_CQE_OFFOLAD_TYPE_GET(offload_type, RSS_TYPE) + +#define RQ_CQE_SGE_VLAN_SHIFT 0 +#define RQ_CQE_SGE_LEN_SHIFT 16 + +#define RQ_CQE_SGE_VLAN_MASK 0xFFFFU +#define RQ_CQE_SGE_LEN_MASK 0xFFFFU + +#define RQ_CQE_SGE_GET(val, member) (((val) >> \ + RQ_CQE_SGE_##member##_SHIFT) & \ + RQ_CQE_SGE_##member##_MASK) + +#define SPNIC_GET_RX_VLAN_TAG(vlan_len) RQ_CQE_SGE_GET(vlan_len, VLAN) + +#define SPNIC_GET_RX_PKT_LEN(vlan_len) RQ_CQE_SGE_GET(vlan_len, LEN) + +#define RQ_CQE_STATUS_CSUM_ERR_SHIFT 0 +#define RQ_CQE_STATUS_NUM_LRO_SHIFT 16 +#define RQ_CQE_STATUS_LRO_PUSH_SHIFT 25 +#define RQ_CQE_STATUS_LRO_ENTER_SHIFT 26 +#define RQ_CQE_STATUS_LRO_INTR_SHIFT 27 + +#define RQ_CQE_STATUS_BP_EN_SHIFT 30 +#define RQ_CQE_STATUS_RXDONE_SHIFT 31 +#define RQ_CQE_STATUS_DECRY_PKT_SHIFT 29 +#define RQ_CQE_STATUS_FLUSH_SHIFT 28 + +#define RQ_CQE_STATUS_CSUM_ERR_MASK 0xFFFFU +#define RQ_CQE_STATUS_NUM_LRO_MASK 0xFFU +#define RQ_CQE_STATUS_LRO_PUSH_MASK 0X1U +#define RQ_CQE_STATUS_LRO_ENTER_MASK 0X1U +#define RQ_CQE_STATUS_LRO_INTR_MASK 0X1U +#define RQ_CQE_STATUS_BP_EN_MASK 0X1U +#define RQ_CQE_STATUS_RXDONE_MASK 0x1U +#define RQ_CQE_STATUS_FLUSH_MASK 0x1U +#define RQ_CQE_STATUS_DECRY_PKT_MASK 0x1U + +#define RQ_CQE_STATUS_GET(val, member) (((val) >> \ + RQ_CQE_STATUS_##member##_SHIFT) & \ + RQ_CQE_STATUS_##member##_MASK) + +#define SPNIC_GET_RX_CSUM_ERR(status) RQ_CQE_STATUS_GET(status, CSUM_ERR) + +#define SPNIC_GET_RX_DONE(status) RQ_CQE_STATUS_GET(status, RXDONE) + +#define SPNIC_GET_RX_FLUSH(status) RQ_CQE_STATUS_GET(status, FLUSH) + +#define SPNIC_GET_RX_BP_EN(status) RQ_CQE_STATUS_GET(status, BP_EN) + +#define SPNIC_GET_RX_NUM_LRO(status) RQ_CQE_STATUS_GET(status, NUM_LRO) + +#define SPNIC_RX_IS_DECRY_PKT(status) RQ_CQE_STATUS_GET(status, DECRY_PKT) + +#define RQ_CQE_SUPER_CQE_EN_SHIFT 0 +#define RQ_CQE_PKT_NUM_SHIFT 1 +#define RQ_CQE_PKT_LAST_LEN_SHIFT 6 +#define RQ_CQE_PKT_FIRST_LEN_SHIFT 19 + +#define RQ_CQE_SUPER_CQE_EN_MASK 0x1 +#define RQ_CQE_PKT_NUM_MASK 0x1FU +#define RQ_CQE_PKT_FIRST_LEN_MASK 0x1FFFU +#define RQ_CQE_PKT_LAST_LEN_MASK 0x1FFFU + +#define RQ_CQE_PKT_NUM_GET(val, member) (((val) >> \ + RQ_CQE_PKT_##member##_SHIFT) & \ + RQ_CQE_PKT_##member##_MASK) +#define SPNIC_GET_RQ_CQE_PKT_NUM(pkt_info) RQ_CQE_PKT_NUM_GET(pkt_info, NUM) + +#define RQ_CQE_SUPER_CQE_EN_GET(val, member) (((val) >> \ + RQ_CQE_##member##_SHIFT) & \ + RQ_CQE_##member##_MASK) +#define SPNIC_GET_SUPER_CQE_EN(pkt_info) \ + RQ_CQE_SUPER_CQE_EN_GET(pkt_info, SUPER_CQE_EN) + +#define RQ_CQE_PKT_LEN_GET(val, member) (((val) >> \ + RQ_CQE_PKT_##member##_SHIFT) & \ + RQ_CQE_PKT_##member##_MASK) + +#define RQ_CQE_DECRY_INFO_DECRY_STATUS_SHIFT 8 +#define RQ_CQE_DECRY_INFO_ESP_NEXT_HEAD_SHIFT 0 + +#define RQ_CQE_DECRY_INFO_DECRY_STATUS_MASK 0xFFU +#define RQ_CQE_DECRY_INFO_ESP_NEXT_HEAD_MASK 0xFFU + +#define RQ_CQE_DECRY_INFO_GET(val, member) (((val) >> \ + RQ_CQE_DECRY_INFO_##member##_SHIFT) & \ + RQ_CQE_DECRY_INFO_##member##_MASK) + +#define SPNIC_GET_DECRYPT_STATUS(decry_info) \ + RQ_CQE_DECRY_INFO_GET(decry_info, DECRY_STATUS) + +#define SPNIC_GET_ESP_NEXT_HEAD(decry_info) \ + RQ_CQE_DECRY_INFO_GET(decry_info, ESP_NEXT_HEAD) + +/* Rx cqe checksum err */ +#define SPNIC_RX_CSUM_IP_CSUM_ERR BIT(0) +#define SPNIC_RX_CSUM_TCP_CSUM_ERR BIT(1) +#define SPNIC_RX_CSUM_UDP_CSUM_ERR BIT(2) +#define SPNIC_RX_CSUM_IGMP_CSUM_ERR BIT(3) +#define SPNIC_RX_CSUM_ICMPv4_CSUM_ERR BIT(4) +#define SPNIC_RX_CSUM_ICMPv6_CSUM_ERR BIT(5) +#define SPNIC_RX_CSUM_SCTP_CRC_ERR BIT(6) +#define SPNIC_RX_CSUM_HW_CHECK_NONE BIT(7) +#define SPNIC_RX_CSUM_IPSU_OTHER_ERR BIT(8) + #define SPNIC_DEFAULT_RX_CSUM_OFFLOAD 0xFFF #define SPNIC_RSS_OFFLOAD_ALL ( \ @@ -138,8 +267,16 @@ void spnic_free_all_rxq_mbufs(struct spnic_nic_dev *nic_dev); int spnic_update_rss_config(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf); +int spnic_poll_rq_empty(struct spnic_rxq *rxq); + +void spnic_dump_cqe_status(struct spnic_rxq *rxq, u32 *cqe_done_cnt, + u32 *cqe_hole_cnt, u32 *head_ci, + u32 *head_done); + int spnic_start_all_rqs(struct rte_eth_dev *eth_dev); +u16 spnic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, u16 nb_pkts); + void spnic_add_rq_to_rx_queue_list(struct spnic_nic_dev *nic_dev, u16 queue_id); diff --git a/drivers/net/spnic/spnic_tx.c b/drivers/net/spnic/spnic_tx.c index d905879412..0772d4929f 100644 --- a/drivers/net/spnic/spnic_tx.c +++ b/drivers/net/spnic/spnic_tx.c @@ -30,6 +30,18 @@ #define SPNIC_TX_OUTER_CHECKSUM_FLAG_SET 1 #define SPNIC_TX_OUTER_CHECKSUM_FLAG_NO_SET 0 +#define SPNIC_TX_OFFLOAD_MASK ( \ + SPNIC_TX_CKSUM_OFFLOAD_MASK | \ + RTE_MBUF_F_TX_VLAN) + +#define SPNIC_TX_CKSUM_OFFLOAD_MASK ( \ + RTE_MBUF_F_TX_IP_CKSUM | \ + RTE_MBUF_F_TX_TCP_CKSUM | \ + RTE_MBUF_F_TX_UDP_CKSUM | \ + RTE_MBUF_F_TX_SCTP_CKSUM | \ + RTE_MBUF_F_TX_OUTER_IP_CKSUM | \ + RTE_MBUF_F_TX_TCP_SEG) + /** * Get send queue free wqebb cnt * @@ -289,6 +301,518 @@ static int spnic_tx_done_cleanup(void *txq, u32 free_cnt) return spnic_xmit_mbuf_cleanup(tx_queue, try_free_cnt); } + +static inline int spnic_tx_offload_pkt_prepare(struct rte_mbuf *mbuf, + u16 *inner_l3_offset) +{ + uint64_t ol_flags = mbuf->ol_flags; + + /* Only support vxlan offload */ + if ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) && + (!(ol_flags & RTE_MBUF_F_TX_TUNNEL_VXLAN))) + return -EINVAL; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (rte_validate_tx_offload(mbuf) != 0) + return -EINVAL; +#endif + if ((ol_flags & RTE_MBUF_F_TX_TUNNEL_VXLAN)) { + if ((ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM) || + (ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) || + (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { + /* + * For this senmatic, l2_len of mbuf means + * len(out_udp + vxlan + in_eth) + */ + *inner_l3_offset = mbuf->l2_len + mbuf->outer_l2_len + + mbuf->outer_l3_len; + } else { + /* + * For this senmatic, l2_len of mbuf means + * len(out_eth + out_ip + out_udp + vxlan + in_eth) + */ + *inner_l3_offset = mbuf->l2_len; + } + } else { + /* For non-tunnel type pkts */ + *inner_l3_offset = mbuf->l2_len; + } + + return 0; +} + +/** + * Set vlan offload info + * + * @param[in] task + * Send queue wqe task section + * @param[in] vlan_tag + * Vlan tag info + * @param[in] vlan_type + * Vlan type in hardware + */ +static inline void spnic_set_vlan_tx_offload(struct spnic_sq_task *task, + u16 vlan_tag, u8 vlan_type) +{ + task->vlan_offload = SQ_TASK_INFO3_SET(vlan_tag, VLAN_TAG) | + SQ_TASK_INFO3_SET(vlan_type, VLAN_TYPE) | + SQ_TASK_INFO3_SET(1U, VLAN_TAG_VALID); +} + +static inline int spnic_set_tx_offload(struct rte_mbuf *mbuf, + struct spnic_sq_task *task, + struct spnic_wqe_info *wqe_info) +{ + uint64_t ol_flags = mbuf->ol_flags; + u16 pld_offset = 0; + u32 queue_info = 0; + u16 vlan_tag; + + task->pkt_info0 = 0; + task->ip_identify = 0; + task->pkt_info2 = 0; + task->vlan_offload = 0; + + /* Vlan offload */ + if (unlikely(ol_flags & RTE_MBUF_F_TX_VLAN)) { + vlan_tag = mbuf->vlan_tci; + spnic_set_vlan_tx_offload(task, vlan_tag, SPNIC_TX_TPID0); + } + + if (!(ol_flags & SPNIC_TX_CKSUM_OFFLOAD_MASK)) + return 0; + + /* Tso offload */ + if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { + pld_offset = wqe_info->payload_offset; + if ((pld_offset >> 1) > MAX_PAYLOAD_OFFSET) + return -EINVAL; + + task->pkt_info0 |= SQ_TASK_INFO0_SET(1U, INNER_L4_EN); + task->pkt_info0 |= SQ_TASK_INFO0_SET(1U, INNER_L3_EN); + + queue_info |= SQ_CTRL_QUEUE_INFO_SET(1U, TSO); + queue_info |= SQ_CTRL_QUEUE_INFO_SET(pld_offset >> 1, PLDOFF); + + /* Set MSS value */ + queue_info = SQ_CTRL_QUEUE_INFO_CLEAR(queue_info, MSS); + queue_info |= SQ_CTRL_QUEUE_INFO_SET(mbuf->tso_segsz, MSS); + } else { + if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) + task->pkt_info0 |= SQ_TASK_INFO0_SET(1U, INNER_L3_EN); + + switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) { + case RTE_MBUF_F_TX_TCP_CKSUM: + case RTE_MBUF_F_TX_UDP_CKSUM: + case RTE_MBUF_F_TX_SCTP_CKSUM: + task->pkt_info0 |= SQ_TASK_INFO0_SET(1U, INNER_L4_EN); + + break; + + case RTE_MBUF_F_TX_L4_NO_CKSUM: + break; + + default: + PMD_DRV_LOG(INFO, "not support pkt type"); + return -EINVAL; + } + } + + /* For vxlan, also can support PKT_TX_TUNNEL_GRE, etc */ + switch (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) { + case RTE_MBUF_F_TX_TUNNEL_VXLAN: + task->pkt_info0 |= SQ_TASK_INFO0_SET(1U, TUNNEL_FLAG); + break; + + case 0: + break; + + default: + /* For non UDP/GRE tunneling, drop the tunnel packet */ + PMD_DRV_LOG(INFO, "not support tunnel pkt type"); + return -EINVAL; + } + + if (ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM) + task->pkt_info0 |= SQ_TASK_INFO0_SET(1U, OUT_L3_EN); + + wqe_info->queue_info = queue_info; + return 0; +} + +static inline bool spnic_is_tso_sge_valid(struct rte_mbuf *mbuf, + struct spnic_wqe_info *wqe_info) +{ + u32 total_len, limit_len, checked_len, left_len, adjust_mss; + u32 i, max_sges, left_sges, first_len; + struct rte_mbuf *mbuf_head, *mbuf_pre, *mbuf_first; + + left_sges = mbuf->nb_segs; + mbuf_head = mbuf; + mbuf_first = mbuf; + + /* tso sge number validation */ + if (unlikely(left_sges >= SPNIC_NONTSO_PKT_MAX_SGE)) { + checked_len = 0; + total_len = 0; + first_len = 0; + adjust_mss = mbuf->tso_segsz >= TX_MSS_MIN ? + mbuf->tso_segsz : TX_MSS_MIN; + max_sges = SPNIC_NONTSO_PKT_MAX_SGE - 1; + limit_len = adjust_mss + wqe_info->payload_offset; + + for (i = 0; (i < max_sges) && (total_len < limit_len); i++) { + total_len += mbuf->data_len; + mbuf_pre = mbuf; + mbuf = mbuf->next; + } + + while (left_sges >= SPNIC_NONTSO_PKT_MAX_SGE) { + if (total_len >= limit_len) { + /* update the limit len */ + limit_len = adjust_mss; + /* update checked len */ + checked_len += first_len; + /* record the first len */ + first_len = mbuf_first->data_len; + /* first mbuf move to the next */ + mbuf_first = mbuf_first->next; + /* update total len */ + total_len -= first_len; + left_sges--; + i--; + for (; (i < max_sges) && + (total_len < limit_len); i++) { + total_len += mbuf->data_len; + mbuf_pre = mbuf; + mbuf = mbuf->next; + } + } else { + /* try to copy if not valid */ + checked_len += (total_len - mbuf_pre->data_len); + + left_len = mbuf_head->pkt_len - checked_len; + if (left_len > SPNIC_COPY_MBUF_SIZE) + return false; + wqe_info->sge_cnt = (u16)(mbuf_head->nb_segs + + i - left_sges); + wqe_info->cpy_mbuf_cnt = 1; + + return true; + } + } + } + + wqe_info->sge_cnt = mbuf_head->nb_segs; + return true; +} + +static inline int spnic_get_tx_offload(struct rte_mbuf *mbuf, + struct spnic_wqe_info *wqe_info) +{ + uint64_t ol_flags = mbuf->ol_flags; + u16 i, total_len, inner_l3_offset = 0; + struct rte_mbuf *mbuf_pkt = NULL; + int err; + + wqe_info->sge_cnt = mbuf->nb_segs; + if (!(ol_flags & SPNIC_TX_OFFLOAD_MASK)) { + wqe_info->offload = 0; + return 0; + } + + wqe_info->offload = 1; + err = spnic_tx_offload_pkt_prepare(mbuf, &inner_l3_offset); + if (err) + return err; + + /* non tso mbuf */ + if (likely(!(mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG))) { + if (unlikely(mbuf->pkt_len > MAX_SINGLE_SGE_SIZE)) + return -EINVAL; + + if (likely(SPNIC_NONTSO_SEG_NUM_VALID(mbuf->nb_segs))) + return 0; + + total_len = 0; + mbuf_pkt = mbuf; + for (i = 0; i < (SPNIC_NONTSO_PKT_MAX_SGE - 1); i++) { + total_len += mbuf_pkt->data_len; + mbuf_pkt = mbuf_pkt->next; + } + + if ((u32)(total_len + (u16)SPNIC_COPY_MBUF_SIZE) < + mbuf->pkt_len) + return -EINVAL; + + wqe_info->sge_cnt = SPNIC_NONTSO_PKT_MAX_SGE; + wqe_info->cpy_mbuf_cnt = 1; + return 0; + } + + /* tso mbuf */ + wqe_info->payload_offset = inner_l3_offset + mbuf->l3_len + + mbuf->l4_len; + + if (unlikely(SPNIC_TSO_SEG_NUM_INVALID(mbuf->nb_segs))) + return -EINVAL; + + if (unlikely(!spnic_is_tso_sge_valid(mbuf, wqe_info))) + return -EINVAL; + + return 0; +} + +static inline void spnic_set_buf_desc(struct spnic_sq_bufdesc *buf_descs, + rte_iova_t addr, u32 len) +{ + buf_descs->hi_addr = upper_32_bits(addr); + buf_descs->lo_addr = lower_32_bits(addr); + buf_descs->len = len; +} + +static inline void *spnic_copy_tx_mbuf(struct spnic_nic_dev *nic_dev, + struct rte_mbuf *mbuf, u16 sge_cnt) +{ + struct rte_mbuf *dst_mbuf; + u32 offset = 0; + u16 i; + + if (unlikely(!nic_dev->cpy_mpool)) + return NULL; + + dst_mbuf = rte_pktmbuf_alloc(nic_dev->cpy_mpool); + if (unlikely(!dst_mbuf)) + return NULL; + + dst_mbuf->data_off = 0; + dst_mbuf->data_len = 0; + for (i = 0; i < sge_cnt; i++) { + rte_memcpy((u8 *)dst_mbuf->buf_addr + offset, + (u8 *)mbuf->buf_addr + mbuf->data_off, + mbuf->data_len); + dst_mbuf->data_len += mbuf->data_len; + offset += mbuf->data_len; + mbuf = mbuf->next; + } + dst_mbuf->pkt_len = dst_mbuf->data_len; + return dst_mbuf; +} + +static int spnic_mbuf_dma_map_sge(struct spnic_txq *txq, struct rte_mbuf *mbuf, + struct spnic_sq_wqe_combo *wqe_combo, + struct spnic_wqe_info *wqe_info) +{ + struct spnic_sq_wqe_desc *wqe_desc = wqe_combo->hdr; + struct spnic_sq_bufdesc *buf_desc = wqe_combo->bds_head; + uint16_t nb_segs = wqe_info->sge_cnt - wqe_info->cpy_mbuf_cnt; + uint16_t real_segs = mbuf->nb_segs; + + rte_iova_t dma_addr; + u32 i; + + for (i = 0; i < nb_segs; i++) { + if (unlikely(mbuf == NULL)) { + txq->txq_stats.mbuf_null++; + return -EINVAL; + } + + if (unlikely(mbuf->data_len == 0)) { + txq->txq_stats.sge_len0++; + return -EINVAL; + } + + dma_addr = rte_mbuf_data_iova(mbuf); + if (i == 0) { + if (wqe_combo->wqe_type == SQ_WQE_COMPACT_TYPE && + mbuf->data_len > COMPACT_WQE_MAX_CTRL_LEN) { + txq->txq_stats.sge_len_too_large++; + return -EINVAL; + } + wqe_desc->hi_addr = upper_32_bits(dma_addr); + wqe_desc->lo_addr = lower_32_bits(dma_addr); + wqe_desc->ctrl_len = mbuf->data_len; + } else { + /* + * Parts of wqe is in sq bottom while parts + * of wqe is in sq head + */ + if (unlikely(wqe_info->wrapped && + (u64)buf_desc == txq->sq_bot_sge_addr)) + buf_desc = (struct spnic_sq_bufdesc *) + (void *)txq->sq_head_addr; + + spnic_set_buf_desc(buf_desc, dma_addr, mbuf->data_len); + buf_desc++; + } + + mbuf = mbuf->next; + } + + if (unlikely(wqe_info->cpy_mbuf_cnt != 0)) { + /* copy invalid mbuf segs to a valid buffer, lost performance */ + txq->txq_stats.cpy_pkts += 1; + mbuf = spnic_copy_tx_mbuf(txq->nic_dev, mbuf, + real_segs - nb_segs); + if (unlikely(!mbuf)) + return -EINVAL; + + txq->tx_info[wqe_info->pi].cpy_mbuf = mbuf; + + /* deal with the last mbuf */ + dma_addr = rte_mbuf_data_iova(mbuf); + if (unlikely(mbuf->data_len == 0)) { + txq->txq_stats.sge_len0++; + return -EINVAL; + } + + if (unlikely(wqe_info->wrapped && + ((u64)buf_desc == txq->sq_bot_sge_addr))) + buf_desc = + (struct spnic_sq_bufdesc *)txq->sq_head_addr; + + spnic_set_buf_desc(buf_desc, dma_addr, mbuf->data_len); + } + return 0; +} + +static inline void spnic_prepare_sq_ctrl(struct spnic_sq_wqe_combo *wqe_combo, + struct spnic_wqe_info *wqe_info) +{ + struct spnic_sq_wqe_desc *wqe_desc = wqe_combo->hdr; + + if (wqe_combo->wqe_type == SQ_WQE_COMPACT_TYPE) { + wqe_desc->ctrl_len |= SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) | + SQ_CTRL_SET(wqe_combo->wqe_type, EXTENDED) | + SQ_CTRL_SET(wqe_info->owner, OWNER); + /* Compact wqe queue_info will transfer to ucode */ + wqe_desc->queue_info = 0; + return; + } + + wqe_desc->ctrl_len |= SQ_CTRL_SET(wqe_info->sge_cnt, BUFDESC_NUM) | + SQ_CTRL_SET(wqe_combo->task_type, TASKSECT_LEN) | + SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) | + SQ_CTRL_SET(wqe_combo->wqe_type, EXTENDED) | + SQ_CTRL_SET(wqe_info->owner, OWNER); + + wqe_desc->queue_info = wqe_info->queue_info; + wqe_desc->queue_info |= SQ_CTRL_QUEUE_INFO_SET(1U, UC); + + if (!SQ_CTRL_QUEUE_INFO_GET(wqe_desc->queue_info, MSS)) { + wqe_desc->queue_info |= + SQ_CTRL_QUEUE_INFO_SET(TX_MSS_DEFAULT, MSS); + } else if (SQ_CTRL_QUEUE_INFO_GET(wqe_desc->queue_info, MSS) < + TX_MSS_MIN) { + /* Mss should not less than 80 */ + wqe_desc->queue_info = + SQ_CTRL_QUEUE_INFO_CLEAR(wqe_desc->queue_info, MSS); + wqe_desc->queue_info |= SQ_CTRL_QUEUE_INFO_SET(TX_MSS_MIN, MSS); + } +} + +u16 spnic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts) +{ + struct spnic_txq *txq = tx_queue; + struct spnic_tx_info *tx_info = NULL; + struct rte_mbuf *mbuf_pkt = NULL; + struct spnic_sq_wqe_combo wqe_combo = {0}; + struct spnic_sq_wqe *sq_wqe = NULL; + struct spnic_wqe_info wqe_info = {0}; + u32 offload_err, free_cnt; + u64 tx_bytes = 0; + u16 free_wqebb_cnt, nb_tx; + int err; + + free_cnt = txq->tx_free_thresh; + /* Reclaim tx mbuf before xmit new packets */ + spnic_xmit_mbuf_cleanup(txq, free_cnt); + + /* Tx loop routine */ + for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { + mbuf_pkt = *tx_pkts++; + if (spnic_get_tx_offload(mbuf_pkt, &wqe_info)) { + txq->txq_stats.off_errs++; + break; + } + + if (!wqe_info.offload) + /* + * Use extended sq wqe with small TS, which can include + * multi sges, or compact sq normal wqe, which just + * supports one sge + */ + wqe_info.wqebb_cnt = mbuf_pkt->nb_segs; + else + /* Use extended sq wqe with normal TS */ + wqe_info.wqebb_cnt = mbuf_pkt->nb_segs + 1; + + free_wqebb_cnt = spnic_get_sq_free_wqebbs(txq); + if (unlikely(wqe_info.wqebb_cnt > free_wqebb_cnt)) { + /* Reclaim again */ + spnic_xmit_mbuf_cleanup(txq, free_cnt); + free_wqebb_cnt = spnic_get_sq_free_wqebbs(txq); + if (unlikely(wqe_info.wqebb_cnt > free_wqebb_cnt)) { + txq->txq_stats.tx_busy += (nb_pkts - nb_tx); + break; + } + } + + /* Get sq wqe address from wqe_page */ + sq_wqe = spnic_get_sq_wqe(txq, &wqe_info); + if (unlikely(!sq_wqe)) { + txq->txq_stats.tx_busy++; + break; + } + + /* Task or bd section maybe warpped for one wqe */ + spnic_set_wqe_combo(txq, &wqe_combo, sq_wqe, &wqe_info); + + wqe_info.queue_info = 0; + /* Fill tx packet offload into qsf and task field */ + if (wqe_info.offload) { + offload_err = spnic_set_tx_offload(mbuf_pkt, + wqe_combo.task, + &wqe_info); + if (unlikely(offload_err)) { + spnic_put_sq_wqe(txq, &wqe_info); + txq->txq_stats.off_errs++; + break; + } + } + + /* Fill sq_wqe buf_desc and bd_desc */ + err = spnic_mbuf_dma_map_sge(txq, mbuf_pkt, &wqe_combo, + &wqe_info); + if (err) { + spnic_put_sq_wqe(txq, &wqe_info); + txq->txq_stats.off_errs++; + break; + } + + /* Record tx info */ + tx_info = &txq->tx_info[wqe_info.pi]; + tx_info->mbuf = mbuf_pkt; + tx_info->wqebb_cnt = wqe_info.wqebb_cnt; + + spnic_prepare_sq_ctrl(&wqe_combo, &wqe_info); + + spnic_write_db(txq->db_addr, txq->q_id, txq->cos, SQ_CFLAG_DP, + MASKED_QUEUE_IDX(txq, txq->prod_idx)); + + tx_bytes += mbuf_pkt->pkt_len; + } + + /* Update txq stats */ + if (nb_tx) { + txq->txq_stats.packets += nb_tx; + txq->txq_stats.bytes += tx_bytes; + } + txq->txq_stats.burst_pkts = nb_tx; + + return nb_tx; +} + int spnic_stop_sq(struct spnic_txq *txq) { struct spnic_nic_dev *nic_dev = txq->nic_dev; diff --git a/drivers/net/spnic/spnic_tx.h b/drivers/net/spnic/spnic_tx.h index d770b15c21..4c2d587104 100644 --- a/drivers/net/spnic/spnic_tx.h +++ b/drivers/net/spnic/spnic_tx.h @@ -4,6 +4,13 @@ #ifndef _SPNIC_TX_H_ #define _SPNIC_TX_H_ +#define MAX_SINGLE_SGE_SIZE 65536 +#define SPNIC_NONTSO_PKT_MAX_SGE 38 +#define SPNIC_NONTSO_SEG_NUM_VALID(num) \ + ((num) <= SPNIC_NONTSO_PKT_MAX_SGE) + +#define SPNIC_TSO_PKT_MAX_SGE 127 +#define SPNIC_TSO_SEG_NUM_INVALID(num) ((num) > SPNIC_TSO_PKT_MAX_SGE) /* Tx offload info */ struct spnic_tx_offload_info { u8 outer_l2_len; -- 2.27.0