From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 00953A04F3; Thu, 2 Jan 2020 18:59:27 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 56CE81C1A6; Thu, 2 Jan 2020 18:59:27 +0100 (CET) Received: from mx0b-0016f401.pphosted.com (mx0a-0016f401.pphosted.com [67.231.148.174]) by dpdk.org (Postfix) with ESMTP id 1152D1C1A3 for ; Thu, 2 Jan 2020 18:59:25 +0100 (CET) Received: from pps.filterd (m0045849.ppops.net [127.0.0.1]) by mx0a-0016f401.pphosted.com (8.16.0.42/8.16.0.42) with SMTP id 002HurIW028769 for ; Thu, 2 Jan 2020 09:59:25 -0800 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com; h=from : to : cc : subject : date : message-id : mime-version : content-type; s=pfpt0818; bh=mVjPPi0Tj/4aT6QXaE0tZxx2oK/l/zb20IVrO8Y6cUo=; b=xXrs7KR5/rxYl5IgYSY/orE1KZTW8yU2MR2G6ECy3VrmKjy7s3hlTLOEuVUK0d+GFojq T98YMQIJ4+UMJ0raHwCkofRHaV0AMprhiSDIlVUR38EqfMgx6vVyMQ0C3KNVBFfzw+BN X+BJq5EwrEdCaxoNVfyg0bbE8KsMl/rkiAplMa/P1JkYcKSBxG3loQ7wWhgKdXz8QE9T mLcWueisfDmomkYDJpZKn33Gxhjuh1Ba4ODls5UYBMuBBNfawauNYD//+vgyqQpXszHD 64StvgfUXMyZ1FE2eJxIunQHGZWP+bQQBRLK5+5IlmnlVBnGL4tSEUbjQj6hWLThTv7p Rg== Received: from sc-exch01.marvell.com ([199.233.58.181]) by mx0a-0016f401.pphosted.com with ESMTP id 2x659vpbvc-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT) for ; Thu, 02 Jan 2020 09:59:25 -0800 Received: from SC-EXCH03.marvell.com (10.93.176.83) by SC-EXCH01.marvell.com (10.93.176.81) with Microsoft SMTP Server (TLS) id 15.0.1497.2; Thu, 2 Jan 2020 09:59:21 -0800 Received: from maili.marvell.com (10.93.176.43) by SC-EXCH03.marvell.com (10.93.176.83) with Microsoft SMTP Server id 15.0.1497.2 via Frontend Transport; Thu, 2 Jan 2020 09:59:20 -0800 Received: from dut1171.mv.qlogic.com (unknown [10.112.88.18]) by maili.marvell.com (Postfix) with ESMTP id B8E6A3F703F; Thu, 2 Jan 2020 09:59:20 -0800 (PST) Received: from dut1171.mv.qlogic.com (localhost [127.0.0.1]) by dut1171.mv.qlogic.com (8.14.7/8.14.7) with ESMTP id 002HxK9l009601; Thu, 2 Jan 2020 09:59:20 -0800 Received: (from root@localhost) by dut1171.mv.qlogic.com (8.14.7/8.14.7/Submit) id 002HxKeY009600; Thu, 2 Jan 2020 09:59:20 -0800 From: Shahed Shaikh To: CC: , , Date: Thu, 2 Jan 2020 09:59:02 -0800 Message-ID: <20200102175903.9556-1-shshaikh@marvell.com> X-Mailer: git-send-email 2.12.0 MIME-Version: 1.0 Content-Type: text/plain X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10434:6.0.95,18.0.572 definitions=2020-01-02_05:2020-01-02,2020-01-02 signatures=0 Subject: [dpdk-dev] [PATCH 1/2] net/qede: enhance receive data path CPU utilization X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Use light weight receive packet handler for non-LRO and non-scatter packets to improve CPU utilization in receive data path. We achieved ~18% CPU cycles improvement using this handler. Signed-off-by: Shahed Shaikh --- drivers/net/qede/qede_ethdev.c | 36 ++++-- drivers/net/qede/qede_rxtx.c | 222 +++++++++++++++++++++++++++++++++ drivers/net/qede/qede_rxtx.h | 4 +- 3 files changed, 253 insertions(+), 9 deletions(-) diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c index 8064735db..47e90096a 100644 --- a/drivers/net/qede/qede_ethdev.c +++ b/drivers/net/qede/qede_ethdev.c @@ -267,6 +267,29 @@ qede_interrupt_handler(void *param) DP_ERR(edev, "rte_intr_ack failed\n"); } +static void +qede_assign_rxtx_handlers(struct rte_eth_dev *dev) +{ + struct qede_dev *qdev = dev->data->dev_private; + struct ecore_dev *edev = &qdev->edev; + + if (ECORE_IS_CMT(edev)) { + dev->rx_pkt_burst = qede_recv_pkts_cmt; + dev->tx_pkt_burst = qede_xmit_pkts_cmt; + return; + } + + if (dev->data->lro || dev->data->scattered_rx) { + DP_INFO(edev, "Assigning qede_recv_pkts\n"); + dev->rx_pkt_burst = qede_recv_pkts; + } else { + DP_INFO(edev, "Assigning qede_recv_pkts_regular\n"); + dev->rx_pkt_burst = qede_recv_pkts_regular; + } + + dev->tx_pkt_burst = qede_xmit_pkts; +} + static void qede_alloc_etherdev(struct qede_dev *qdev, struct qed_dev_eth_info *info) { @@ -1064,6 +1087,7 @@ static int qede_dev_start(struct rte_eth_dev *eth_dev) /* Start/resume traffic */ qede_fastpath_start(edev); + qede_assign_rxtx_handlers(eth_dev); DP_INFO(edev, "Device started\n"); return 0; @@ -1951,6 +1975,7 @@ qede_dev_supported_ptypes_get(struct rte_eth_dev *eth_dev) }; if (eth_dev->rx_pkt_burst == qede_recv_pkts || + eth_dev->rx_pkt_burst == qede_recv_pkts_regular || eth_dev->rx_pkt_burst == qede_recv_pkts_cmt) return ptypes; @@ -2242,7 +2267,9 @@ static int qede_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) /* update max frame size */ dev->data->dev_conf.rxmode.max_rx_pkt_len = max_rx_pkt_len; + /* Reassign back */ + qede_assign_rxtx_handlers(dev); if (ECORE_IS_CMT(edev)) { dev->rx_pkt_burst = qede_recv_pkts_cmt; dev->tx_pkt_burst = qede_xmit_pkts_cmt; @@ -2447,14 +2474,7 @@ static int qede_common_dev_init(struct rte_eth_dev *eth_dev, bool is_vf) strncpy((char *)params.name, QEDE_PMD_VER_PREFIX, QEDE_PMD_DRV_VER_STR_SIZE); - if (ECORE_IS_CMT(edev)) { - eth_dev->rx_pkt_burst = qede_recv_pkts_cmt; - eth_dev->tx_pkt_burst = qede_xmit_pkts_cmt; - } else { - eth_dev->rx_pkt_burst = qede_recv_pkts; - eth_dev->tx_pkt_burst = qede_xmit_pkts; - } - + qede_assign_rxtx_handlers(eth_dev); eth_dev->tx_pkt_prepare = qede_xmit_prep_pkts; /* For CMT mode device do periodic polling for slowpath events. diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c index dbb74fc64..3b486a0a4 100644 --- a/drivers/net/qede/qede_rxtx.c +++ b/drivers/net/qede/qede_rxtx.c @@ -1518,6 +1518,228 @@ print_rx_bd_info(struct rte_mbuf *m, struct qede_rx_queue *rxq, } #endif +uint16_t +qede_recv_pkts_regular(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + struct eth_fast_path_rx_reg_cqe *fp_cqe = NULL; + register struct rte_mbuf *rx_mb = NULL; + struct qede_rx_queue *rxq = p_rxq; + struct qede_dev *qdev = rxq->qdev; + struct ecore_dev *edev = &qdev->edev; + union eth_rx_cqe *cqe; + uint64_t ol_flags; + enum eth_rx_cqe_type cqe_type; + int rss_enable = qdev->rss_enable; + int rx_alloc_count = 0; + uint32_t packet_type; + uint32_t rss_hash; + uint16_t vlan_tci, port_id; + uint16_t hw_comp_cons, sw_comp_cons, sw_rx_index, num_rx_bds; + uint16_t rx_pkt = 0; + uint16_t pkt_len = 0; + uint16_t len; /* Length of first BD */ + uint16_t preload_idx; + uint16_t parse_flag; +#ifdef RTE_LIBRTE_QEDE_DEBUG_RX + uint8_t bitfield_val; +#endif + uint8_t offset, flags, bd_num; + + + /* Allocate buffers that we used in previous loop */ + if (rxq->rx_alloc_count) { + if (unlikely(qede_alloc_rx_bulk_mbufs(rxq, + rxq->rx_alloc_count))) { + struct rte_eth_dev *dev; + + PMD_RX_LOG(ERR, rxq, + "New buffer allocation failed," + "dropping incoming packetn"); + dev = &rte_eth_devices[rxq->port_id]; + dev->data->rx_mbuf_alloc_failed += + rxq->rx_alloc_count; + rxq->rx_alloc_errors += rxq->rx_alloc_count; + return 0; + } + qede_update_rx_prod(qdev, rxq); + rxq->rx_alloc_count = 0; + } + + hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr); + sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); + + rte_rmb(); + + if (hw_comp_cons == sw_comp_cons) + return 0; + + num_rx_bds = NUM_RX_BDS(rxq); + port_id = rxq->port_id; + + while (sw_comp_cons != hw_comp_cons) { + ol_flags = 0; + packet_type = RTE_PTYPE_UNKNOWN; + vlan_tci = 0; + rss_hash = 0; + + /* Get the CQE from the completion ring */ + cqe = + (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring); + cqe_type = cqe->fast_path_regular.type; + PMD_RX_LOG(INFO, rxq, "Rx CQE type %d\n", cqe_type); + + if (likely(cqe_type == ETH_RX_CQE_TYPE_REGULAR)) { + fp_cqe = &cqe->fast_path_regular; + } else { + if (cqe_type == ETH_RX_CQE_TYPE_SLOW_PATH) { + PMD_RX_LOG(INFO, rxq, "Got unexpected slowpath CQE\n"); + ecore_eth_cqe_completion + (&edev->hwfns[rxq->queue_id % + edev->num_hwfns], + (struct eth_slow_path_rx_cqe *)cqe); + } + goto next_cqe; + } + + /* Get the data from the SW ring */ + sw_rx_index = rxq->sw_rx_cons & num_rx_bds; + rx_mb = rxq->sw_rx_ring[sw_rx_index].mbuf; + assert(rx_mb != NULL); + + parse_flag = rte_le_to_cpu_16(fp_cqe->pars_flags.flags); + offset = fp_cqe->placement_offset; + len = rte_le_to_cpu_16(fp_cqe->len_on_first_bd); + pkt_len = rte_le_to_cpu_16(fp_cqe->pkt_len); + vlan_tci = rte_le_to_cpu_16(fp_cqe->vlan_tag); + rss_hash = rte_le_to_cpu_32(fp_cqe->rss_hash); + bd_num = fp_cqe->bd_num; +#ifdef RTE_LIBRTE_QEDE_DEBUG_RX + bitfield_val = fp_cqe->bitfields; +#endif + + if (unlikely(qede_tunn_exist(parse_flag))) { + PMD_RX_LOG(INFO, rxq, "Rx tunneled packet\n"); + if (unlikely(qede_check_tunn_csum_l4(parse_flag))) { + PMD_RX_LOG(ERR, rxq, + "L4 csum failed, flags = 0x%x\n", + parse_flag); + rxq->rx_hw_errors++; + ol_flags |= PKT_RX_L4_CKSUM_BAD; + } else { + ol_flags |= PKT_RX_L4_CKSUM_GOOD; + } + + if (unlikely(qede_check_tunn_csum_l3(parse_flag))) { + PMD_RX_LOG(ERR, rxq, + "Outer L3 csum failed, flags = 0x%x\n", + parse_flag); + rxq->rx_hw_errors++; + ol_flags |= PKT_RX_EIP_CKSUM_BAD; + } else { + ol_flags |= PKT_RX_IP_CKSUM_GOOD; + } + + flags = fp_cqe->tunnel_pars_flags.flags; + + /* Tunnel_type */ + packet_type = + qede_rx_cqe_to_tunn_pkt_type(flags); + + /* Inner header */ + packet_type |= + qede_rx_cqe_to_pkt_type_inner(parse_flag); + + /* Outer L3/L4 types is not available in CQE */ + packet_type |= qede_rx_cqe_to_pkt_type_outer(rx_mb); + + /* Outer L3/L4 types is not available in CQE. + * Need to add offset to parse correctly, + */ + rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM; + packet_type |= qede_rx_cqe_to_pkt_type_outer(rx_mb); + } else { + packet_type |= qede_rx_cqe_to_pkt_type(parse_flag); + } + + /* Common handling for non-tunnel packets and for inner + * headers in the case of tunnel. + */ + if (unlikely(qede_check_notunn_csum_l4(parse_flag))) { + PMD_RX_LOG(ERR, rxq, + "L4 csum failed, flags = 0x%x\n", + parse_flag); + rxq->rx_hw_errors++; + ol_flags |= PKT_RX_L4_CKSUM_BAD; + } else { + ol_flags |= PKT_RX_L4_CKSUM_GOOD; + } + if (unlikely(qede_check_notunn_csum_l3(rx_mb, parse_flag))) { + PMD_RX_LOG(ERR, rxq, "IP csum failed, flags = 0x%x\n", + parse_flag); + rxq->rx_hw_errors++; + ol_flags |= PKT_RX_IP_CKSUM_BAD; + } else { + ol_flags |= PKT_RX_IP_CKSUM_GOOD; + } + + if (unlikely(CQE_HAS_VLAN(parse_flag) || + CQE_HAS_OUTER_VLAN(parse_flag))) { + /* Note: FW doesn't indicate Q-in-Q packet */ + ol_flags |= PKT_RX_VLAN; + if (qdev->vlan_strip_flg) { + ol_flags |= PKT_RX_VLAN_STRIPPED; + rx_mb->vlan_tci = vlan_tci; + } + } + + if (rss_enable) { + ol_flags |= PKT_RX_RSS_HASH; + rx_mb->hash.rss = rss_hash; + } + + rx_alloc_count++; + qede_rx_bd_ring_consume(rxq); + + /* Prefetch next mbuf while processing current one. */ + preload_idx = rxq->sw_rx_cons & num_rx_bds; + rte_prefetch0(rxq->sw_rx_ring[preload_idx].mbuf); + + /* Update rest of the MBUF fields */ + rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM; + rx_mb->port = port_id; + rx_mb->ol_flags = ol_flags; + rx_mb->data_len = len; + rx_mb->packet_type = packet_type; +#ifdef RTE_LIBRTE_QEDE_DEBUG_RX + print_rx_bd_info(rx_mb, rxq, bitfield_val); +#endif + rx_mb->nb_segs = bd_num; + rx_mb->pkt_len = pkt_len; + + rx_pkts[rx_pkt] = rx_mb; + rx_pkt++; + +next_cqe: + ecore_chain_recycle_consumed(&rxq->rx_comp_ring); + sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); + if (rx_pkt == nb_pkts) { + PMD_RX_LOG(DEBUG, rxq, + "Budget reached nb_pkts=%u received=%u", + rx_pkt, nb_pkts); + break; + } + } + + /* Request number of bufferes to be allocated in next loop */ + rxq->rx_alloc_count = rx_alloc_count; + + rxq->rcv_pkts += rx_pkt; + rxq->rx_segs += rx_pkt; + PMD_RX_LOG(DEBUG, rxq, "rx_pkts=%u core=%d", rx_pkt, rte_lcore_id()); + + return rx_pkt; +} + uint16_t qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { diff --git a/drivers/net/qede/qede_rxtx.h b/drivers/net/qede/qede_rxtx.h index 75cc930fd..a4c634e88 100644 --- a/drivers/net/qede/qede_rxtx.h +++ b/drivers/net/qede/qede_rxtx.h @@ -283,7 +283,9 @@ uint16_t qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); uint16_t qede_recv_pkts_cmt(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); - +uint16_t +qede_recv_pkts_regular(void *p_rxq, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); uint16_t qede_rxtx_pkts_dummy(void *p_rxq, struct rte_mbuf **pkts, uint16_t nb_pkts); -- 2.17.1