From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id CAFE2A0544; Fri, 2 Sep 2022 14:18:53 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id A59B040695; Fri, 2 Sep 2022 14:18:53 +0200 (CEST) Received: from szxga01-in.huawei.com (szxga01-in.huawei.com [45.249.212.187]) by mails.dpdk.org (Postfix) with ESMTP id 4D88240693 for ; Fri, 2 Sep 2022 14:18:51 +0200 (CEST) Received: from dggpeml500024.china.huawei.com (unknown [172.30.72.53]) by szxga01-in.huawei.com (SkyGuard) with ESMTP id 4MJxgb0BVrzkWYj; Fri, 2 Sep 2022 20:15:07 +0800 (CST) Received: from [10.67.100.224] (10.67.100.224) by dggpeml500024.china.huawei.com (7.185.36.10) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.24; Fri, 2 Sep 2022 20:18:49 +0800 Subject: Re: [Patch v6 15/18] net/mana: add function to send packets To: , Ferruh Yigit CC: , Ajay Sharma , Stephen Hemminger References: <1661899911-13086-1-git-send-email-longli@linuxonhyperv.com> <1661899911-13086-16-git-send-email-longli@linuxonhyperv.com> From: fengchengwen Message-ID: Date: Fri, 2 Sep 2022 20:18:49 +0800 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.11.0 MIME-Version: 1.0 In-Reply-To: <1661899911-13086-16-git-send-email-longli@linuxonhyperv.com> Content-Type: text/plain; charset="utf-8" Content-Language: en-US Content-Transfer-Encoding: 7bit X-Originating-IP: [10.67.100.224] X-ClientProxiedBy: dggems702-chm.china.huawei.com (10.3.19.179) To dggpeml500024.china.huawei.com (7.185.36.10) X-CFilter-Loop: Reflected X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org On 2022/8/31 6:51, longli@linuxonhyperv.com wrote: > From: Long Li > > With all the TX queues created, MANA can send packets over those queues. > > Signed-off-by: Long Li ... > } > + > +uint16_t mana_tx_burst(void *dpdk_txq, struct rte_mbuf **tx_pkts, > + uint16_t nb_pkts) > +{ > + struct mana_txq *txq = dpdk_txq; > + struct mana_priv *priv = txq->priv; > + struct gdma_comp comp; > + int ret; > + void *db_page; > + > + /* Process send completions from GDMA */ > + while (gdma_poll_completion_queue(&txq->gdma_cq, &comp) == 1) { > + struct mana_txq_desc *desc = > + &txq->desc_ring[txq->desc_ring_tail]; > + struct mana_tx_comp_oob *oob = > + (struct mana_tx_comp_oob *)&comp.completion_data[0]; > + > + if (oob->cqe_hdr.cqe_type != CQE_TX_OKAY) { > + DRV_LOG(ERR, > + "mana_tx_comp_oob cqe_type %u vendor_err %u", > + oob->cqe_hdr.cqe_type, oob->cqe_hdr.vendor_err); > + txq->stats.errors++; > + } else { > + DRV_LOG(DEBUG, "mana_tx_comp_oob CQE_TX_OKAY"); > + txq->stats.packets++; > + } > + > + if (!desc->pkt) { > + DRV_LOG(ERR, "mana_txq_desc has a NULL pkt"); > + } else { > + txq->stats.bytes += desc->pkt->data_len; > + rte_pktmbuf_free(desc->pkt); > + } > + > + desc->pkt = NULL; > + txq->desc_ring_tail = (txq->desc_ring_tail + 1) % txq->num_desc; > + txq->gdma_sq.tail += desc->wqe_size_in_bu; > + } > + > + /* Post send requests to GDMA */ > + uint16_t pkt_idx; > + > + for (pkt_idx = 0; pkt_idx < nb_pkts; pkt_idx++) { > + struct rte_mbuf *m_pkt = tx_pkts[pkt_idx]; > + struct rte_mbuf *m_seg = m_pkt; > + struct transmit_oob_v2 tx_oob = {0}; > + struct one_sgl sgl = {0}; > + > + /* Drop the packet if it exceeds max segments */ > + if (m_pkt->nb_segs > priv->max_send_sge) { > + DRV_LOG(ERR, "send packet segments %d exceeding max", > + m_pkt->nb_segs); This branch violate rte_eth_tx_burst definition, which defined the return value is " * The maximum number of packets to transmit." Also I notice the driver didn't implement tx-prepare, which could hold such checking in framework's definition. > + continue; > + } > + > + /* Fill in the oob */ > + tx_oob.short_oob.packet_format = short_packet_format; > + tx_oob.short_oob.tx_is_outer_ipv4 = > + m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4 ? 1 : 0; > + tx_oob.short_oob.tx_is_outer_ipv6 = > + m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6 ? 1 : 0; > + > + tx_oob.short_oob.tx_compute_IP_header_checksum = > + m_pkt->ol_flags & RTE_MBUF_F_TX_IP_CKSUM ? 1 : 0; > + > + if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) == > + RTE_MBUF_F_TX_TCP_CKSUM) { > + struct rte_tcp_hdr *tcp_hdr; > + > + /* HW needs partial TCP checksum */ > + > + tcp_hdr = rte_pktmbuf_mtod_offset(m_pkt, > + struct rte_tcp_hdr *, > + m_pkt->l2_len + m_pkt->l3_len); > + > + if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) { > + struct rte_ipv4_hdr *ip_hdr; > + > + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt, > + struct rte_ipv4_hdr *, > + m_pkt->l2_len); > + tcp_hdr->cksum = rte_ipv4_phdr_cksum(ip_hdr, > + m_pkt->ol_flags); > + > + } else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) { > + struct rte_ipv6_hdr *ip_hdr; > + > + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt, > + struct rte_ipv6_hdr *, > + m_pkt->l2_len); > + tcp_hdr->cksum = rte_ipv6_phdr_cksum(ip_hdr, > + m_pkt->ol_flags); > + } else { > + DRV_LOG(ERR, "Invalid input for TCP CKSUM"); > + } > + > + tx_oob.short_oob.tx_compute_TCP_checksum = 1; > + tx_oob.short_oob.tx_transport_header_offset = > + m_pkt->l2_len + m_pkt->l3_len; > + } > + > + if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) == > + RTE_MBUF_F_TX_UDP_CKSUM) { > + struct rte_udp_hdr *udp_hdr; > + > + /* HW needs partial UDP checksum */ > + udp_hdr = rte_pktmbuf_mtod_offset(m_pkt, > + struct rte_udp_hdr *, > + m_pkt->l2_len + m_pkt->l3_len); > + > + if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) { > + struct rte_ipv4_hdr *ip_hdr; > + > + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt, > + struct rte_ipv4_hdr *, > + m_pkt->l2_len); > + > + udp_hdr->dgram_cksum = > + rte_ipv4_phdr_cksum(ip_hdr, > + m_pkt->ol_flags); > + > + } else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) { > + struct rte_ipv6_hdr *ip_hdr; > + > + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt, > + struct rte_ipv6_hdr *, > + m_pkt->l2_len); > + > + udp_hdr->dgram_cksum = > + rte_ipv6_phdr_cksum(ip_hdr, > + m_pkt->ol_flags); > + > + } else { > + DRV_LOG(ERR, "Invalid input for UDP CKSUM"); > + } > + > + tx_oob.short_oob.tx_compute_UDP_checksum = 1; > + } Why not have TSO process, which was reported support in dev_info_get ? > + > + tx_oob.short_oob.suppress_tx_CQE_generation = 0; > + tx_oob.short_oob.VCQ_number = txq->gdma_cq.id; > + > + tx_oob.short_oob.VSQ_frame_num = > + get_vsq_frame_num(txq->gdma_sq.id); > + tx_oob.short_oob.short_vport_offset = txq->tx_vp_offset; > + > + DRV_LOG(DEBUG, "tx_oob packet_format %u ipv4 %u ipv6 %u", > + tx_oob.short_oob.packet_format, > + tx_oob.short_oob.tx_is_outer_ipv4, > + tx_oob.short_oob.tx_is_outer_ipv6); > + > + DRV_LOG(DEBUG, "tx_oob checksum ip %u tcp %u udp %u offset %u", > + tx_oob.short_oob.tx_compute_IP_header_checksum, > + tx_oob.short_oob.tx_compute_TCP_checksum, > + tx_oob.short_oob.tx_compute_UDP_checksum, > + tx_oob.short_oob.tx_transport_header_offset); > + > + DRV_LOG(DEBUG, "pkt[%d]: buf_addr 0x%p, nb_segs %d, pkt_len %d", > + pkt_idx, m_pkt->buf_addr, m_pkt->nb_segs, > + m_pkt->pkt_len); > + > + /* Create SGL for packet data buffers */ > + for (uint16_t seg_idx = 0; seg_idx < m_pkt->nb_segs; seg_idx++) { > + struct mana_mr_cache *mr = > + mana_find_pmd_mr(&txq->mr_btree, priv, m_seg); > + > + if (!mr) { > + DRV_LOG(ERR, "failed to get MR, pkt_idx %u", > + pkt_idx); > + return pkt_idx; > + } > + > + sgl.gdma_sgl[seg_idx].address = > + rte_cpu_to_le_64(rte_pktmbuf_mtod(m_seg, > + uint64_t)); > + sgl.gdma_sgl[seg_idx].size = m_seg->data_len; > + sgl.gdma_sgl[seg_idx].memory_key = mr->lkey; > + > + DRV_LOG(DEBUG, > + "seg idx %u addr 0x%" PRIx64 " size %x key %x", > + seg_idx, sgl.gdma_sgl[seg_idx].address, > + sgl.gdma_sgl[seg_idx].size, > + sgl.gdma_sgl[seg_idx].memory_key); > + > + m_seg = m_seg->next; > + } > + > + struct gdma_work_request work_req = {0}; > + struct gdma_posted_wqe_info wqe_info = {0}; > + > + work_req.gdma_header.struct_size = sizeof(work_req); > + wqe_info.gdma_header.struct_size = sizeof(wqe_info); > + > + work_req.sgl = sgl.gdma_sgl; > + work_req.num_sgl_elements = m_pkt->nb_segs; > + work_req.inline_oob_size_in_bytes = > + sizeof(struct transmit_short_oob_v2); > + work_req.inline_oob_data = &tx_oob; > + work_req.flags = 0; > + work_req.client_data_unit = NOT_USING_CLIENT_DATA_UNIT; > + > + ret = gdma_post_work_request(&txq->gdma_sq, &work_req, > + &wqe_info); > + if (!ret) { > + struct mana_txq_desc *desc = > + &txq->desc_ring[txq->desc_ring_head]; > + > + /* Update queue for tracking pending requests */ > + desc->pkt = m_pkt; > + desc->wqe_size_in_bu = wqe_info.wqe_size_in_bu; > + txq->desc_ring_head = > + (txq->desc_ring_head + 1) % txq->num_desc; > + > + DRV_LOG(DEBUG, "nb_pkts %u pkt[%d] sent", > + nb_pkts, pkt_idx); > + } else { > + DRV_LOG(INFO, "pkt[%d] failed to post send ret %d", > + pkt_idx, ret); > + break; > + } > + } > + > + /* Ring hardware door bell */ > + db_page = priv->db_page; > + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { > + struct rte_eth_dev *dev = > + &rte_eth_devices[priv->dev_data->port_id]; > + struct mana_process_priv *process_priv = dev->process_private; > + > + db_page = process_priv->db_page; > + } > + > + ret = mana_ring_doorbell(db_page, gdma_queue_send, > + txq->gdma_sq.id, > + txq->gdma_sq.head * > + GDMA_WQE_ALIGNMENT_UNIT_SIZE); > + if (ret) > + DRV_LOG(ERR, "mana_ring_doorbell failed ret %d", ret); > + > + return pkt_idx; > +} >