From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx0b-0016ce01.pphosted.com (mx0a-0016ce01.pphosted.com [67.231.148.157]) by dpdk.org (Postfix) with ESMTP id 40C999611 for ; Fri, 1 Apr 2016 02:15:55 +0200 (CEST) Received: from pps.filterd (m0095336.ppops.net [127.0.0.1]) by mx0b-0016ce01.pphosted.com (8.16.0.11/8.16.0.11) with SMTP id u310DtE3014430 for ; Thu, 31 Mar 2016 17:15:54 -0700 Received: from avcashub1.qlogic.com ([198.186.0.117]) by mx0a-0016ce01.pphosted.com with ESMTP id 2203pc4yvv-1 (version=TLSv1 cipher=AES128-SHA bits=128 verify=NOT) for ; Thu, 31 Mar 2016 17:15:54 -0700 Received: from avluser05.qlc.com (10.1.113.115) by qlc.com (10.1.4.192) with Microsoft SMTP Server id 14.3.235.1; Thu, 31 Mar 2016 17:15:53 -0700 Received: (from rmody@localhost) by avluser05.qlc.com (8.14.4/8.14.4/Submit) id u310FrSo003403; Thu, 31 Mar 2016 17:15:53 -0700 X-Authentication-Warning: avluser05.qlc.com: rmody set sender to rasesh.mody@qlogic.com using -f From: Rasesh Mody To: CC: , Rasesh Mody , Harish Patil Date: Thu, 31 Mar 2016 17:15:23 -0700 Message-ID: <1459469725-3333-2-git-send-email-rasesh.mody@qlogic.com> X-Mailer: git-send-email 1.7.10.3 In-Reply-To: <1459469725-3333-1-git-send-email-rasesh.mody@qlogic.com> References: <1459469725-3333-1-git-send-email-rasesh.mody@qlogic.com> MIME-Version: 1.0 Content-Type: text/plain disclaimer: bypass X-Proofpoint-Virus-Version: vendor=nai engine=5800 definitions=8121 signatures=670706 X-Proofpoint-Spam-Details: rule=notspam policy=default score=0 priorityscore=1501 suspectscore=4 phishscore=0 bulkscore=0 spamscore=0 clxscore=1015 impostorscore=0 lowpriorityscore=0 adultscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.0.1-1603180000 definitions=main-1604010001 Subject: [dpdk-dev] [PATCH v2 2/4] bnx2x: Fix Tx Performance X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 01 Apr 2016 00:15:55 -0000 Change the Tx routine logic to ring the doorbell once per burst and not on every Tx packet. This driver-level optimization is necessary to achieve line rates for larger frame sizes (1k or more). Fixes: 540a211084a7 ("bnx2x: driver core") Signed-off-by: Harish Patil Signed-off-by: Rasesh Mody --- drivers/net/bnx2x/bnx2x.c | 207 ++++++++++++++++++---------------------- drivers/net/bnx2x/bnx2x.h | 4 +- drivers/net/bnx2x/bnx2x_rxtx.c | 45 ++++----- 3 files changed, 118 insertions(+), 138 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c index 6edb2f9..149fdef 100644 --- a/drivers/net/bnx2x/bnx2x.c +++ b/drivers/net/bnx2x/bnx2x.c @@ -1293,7 +1293,7 @@ bnx2x_free_tx_pkt(__rte_unused struct bnx2x_fastpath *fp, struct bnx2x_tx_queue struct rte_mbuf *tx_mbuf = txq->sw_ring[TX_BD(pkt_idx, txq)]; if (likely(tx_mbuf != NULL)) { - rte_pktmbuf_free(tx_mbuf); + rte_pktmbuf_free_seg(tx_mbuf); } else { PMD_RX_LOG(ERR, "fp[%02d] lost mbuf %lu", fp->index, (unsigned long)TX_BD(pkt_idx, txq)); @@ -2113,147 +2113,128 @@ bnx2x_nic_unload(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_link * the mbuf and return to the caller. * * Returns: - * 0 = Success, !0 = Failure + * Number of TX BDs used for the mbuf * Note the side effect that an mbuf may be freed if it causes a problem. */ -int bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf **m_head, int m_pkts) +uint32_t +bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf *m0) { - struct rte_mbuf *m0; struct eth_tx_start_bd *tx_start_bd; uint16_t bd_prod, pkt_prod; - int m_tx; struct bnx2x_softc *sc; uint32_t nbds = 0; - struct bnx2x_fastpath *fp; sc = txq->sc; - fp = &sc->fp[txq->queue_id]; bd_prod = txq->tx_bd_tail; pkt_prod = txq->tx_pkt_tail; - for (m_tx = 0; m_tx < m_pkts; m_tx++) { + txq->sw_ring[TX_BD(pkt_prod, txq)] = m0; - m0 = *m_head++; + tx_start_bd = &txq->tx_ring[TX_BD(bd_prod, txq)].start_bd; - if (unlikely(txq->nb_tx_avail < 3)) { - PMD_TX_LOG(ERR, "no enough bds %d/%d", - bd_prod, txq->nb_tx_avail); - return -ENOMEM; - } + tx_start_bd->addr = + rte_cpu_to_le_64(rte_mbuf_data_dma_addr(m0)); + tx_start_bd->nbytes = rte_cpu_to_le_16(m0->data_len); + tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD; + tx_start_bd->general_data = + (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT); - txq->sw_ring[TX_BD(pkt_prod, txq)] = m0; + tx_start_bd->nbd = rte_cpu_to_le_16(2); - tx_start_bd = &txq->tx_ring[TX_BD(bd_prod, txq)].start_bd; - - tx_start_bd->addr = - rte_cpu_to_le_64(rte_mbuf_data_dma_addr(m0)); - tx_start_bd->nbytes = rte_cpu_to_le_16(m0->data_len); - tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD; - tx_start_bd->general_data = - (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT); - - tx_start_bd->nbd = rte_cpu_to_le_16(2); + if (m0->ol_flags & PKT_TX_VLAN_PKT) { + tx_start_bd->vlan_or_ethertype = + rte_cpu_to_le_16(m0->vlan_tci); + tx_start_bd->bd_flags.as_bitfield |= + (X_ETH_OUTBAND_VLAN << + ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT); + } else { + if (IS_PF(sc)) + tx_start_bd->vlan_or_ethertype = + rte_cpu_to_le_16(pkt_prod); + else { + struct ether_hdr *eh = + rte_pktmbuf_mtod(m0, struct ether_hdr *); - if (m0->ol_flags & PKT_TX_VLAN_PKT) { tx_start_bd->vlan_or_ethertype = - rte_cpu_to_le_16(m0->vlan_tci); - tx_start_bd->bd_flags.as_bitfield |= - (X_ETH_OUTBAND_VLAN << - ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT); - } else { - if (IS_PF(sc)) - tx_start_bd->vlan_or_ethertype = - rte_cpu_to_le_16(pkt_prod); - else { - struct ether_hdr *eh - = rte_pktmbuf_mtod(m0, struct ether_hdr *); - - tx_start_bd->vlan_or_ethertype - = rte_cpu_to_le_16(rte_be_to_cpu_16(eh->ether_type)); - } + rte_cpu_to_le_16(rte_be_to_cpu_16(eh->ether_type)); } + } - bd_prod = NEXT_TX_BD(bd_prod); - if (IS_VF(sc)) { - struct eth_tx_parse_bd_e2 *tx_parse_bd; - const struct ether_hdr *eh = rte_pktmbuf_mtod(m0, struct ether_hdr *); - uint8_t mac_type = UNICAST_ADDRESS; - - tx_parse_bd = - &txq->tx_ring[TX_BD(bd_prod, txq)].parse_bd_e2; - if (is_multicast_ether_addr(&eh->d_addr)) { - if (is_broadcast_ether_addr(&eh->d_addr)) - mac_type = BROADCAST_ADDRESS; - else - mac_type = MULTICAST_ADDRESS; - } - tx_parse_bd->parsing_data = - (mac_type << ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE_SHIFT); - - rte_memcpy(&tx_parse_bd->data.mac_addr.dst_hi, - &eh->d_addr.addr_bytes[0], 2); - rte_memcpy(&tx_parse_bd->data.mac_addr.dst_mid, - &eh->d_addr.addr_bytes[2], 2); - rte_memcpy(&tx_parse_bd->data.mac_addr.dst_lo, - &eh->d_addr.addr_bytes[4], 2); - rte_memcpy(&tx_parse_bd->data.mac_addr.src_hi, - &eh->s_addr.addr_bytes[0], 2); - rte_memcpy(&tx_parse_bd->data.mac_addr.src_mid, - &eh->s_addr.addr_bytes[2], 2); - rte_memcpy(&tx_parse_bd->data.mac_addr.src_lo, - &eh->s_addr.addr_bytes[4], 2); - - tx_parse_bd->data.mac_addr.dst_hi = - rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_hi); - tx_parse_bd->data.mac_addr.dst_mid = - rte_cpu_to_be_16(tx_parse_bd->data. - mac_addr.dst_mid); - tx_parse_bd->data.mac_addr.dst_lo = - rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_lo); - tx_parse_bd->data.mac_addr.src_hi = - rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_hi); - tx_parse_bd->data.mac_addr.src_mid = - rte_cpu_to_be_16(tx_parse_bd->data. - mac_addr.src_mid); - tx_parse_bd->data.mac_addr.src_lo = - rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_lo); - - PMD_TX_LOG(DEBUG, - "PBD dst %x %x %x src %x %x %x p_data %x", - tx_parse_bd->data.mac_addr.dst_hi, - tx_parse_bd->data.mac_addr.dst_mid, - tx_parse_bd->data.mac_addr.dst_lo, - tx_parse_bd->data.mac_addr.src_hi, - tx_parse_bd->data.mac_addr.src_mid, - tx_parse_bd->data.mac_addr.src_lo, - tx_parse_bd->parsing_data); - } + bd_prod = NEXT_TX_BD(bd_prod); + if (IS_VF(sc)) { + struct eth_tx_parse_bd_e2 *tx_parse_bd; + const struct ether_hdr *eh = + rte_pktmbuf_mtod(m0, struct ether_hdr *); + uint8_t mac_type = UNICAST_ADDRESS; + + tx_parse_bd = + &txq->tx_ring[TX_BD(bd_prod, txq)].parse_bd_e2; + if (is_multicast_ether_addr(&eh->d_addr)) { + if (is_broadcast_ether_addr(&eh->d_addr)) + mac_type = BROADCAST_ADDRESS; + else + mac_type = MULTICAST_ADDRESS; + } + tx_parse_bd->parsing_data = + (mac_type << ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE_SHIFT); + + rte_memcpy(&tx_parse_bd->data.mac_addr.dst_hi, + &eh->d_addr.addr_bytes[0], 2); + rte_memcpy(&tx_parse_bd->data.mac_addr.dst_mid, + &eh->d_addr.addr_bytes[2], 2); + rte_memcpy(&tx_parse_bd->data.mac_addr.dst_lo, + &eh->d_addr.addr_bytes[4], 2); + rte_memcpy(&tx_parse_bd->data.mac_addr.src_hi, + &eh->s_addr.addr_bytes[0], 2); + rte_memcpy(&tx_parse_bd->data.mac_addr.src_mid, + &eh->s_addr.addr_bytes[2], 2); + rte_memcpy(&tx_parse_bd->data.mac_addr.src_lo, + &eh->s_addr.addr_bytes[4], 2); + + tx_parse_bd->data.mac_addr.dst_hi = + rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_hi); + tx_parse_bd->data.mac_addr.dst_mid = + rte_cpu_to_be_16(tx_parse_bd->data. + mac_addr.dst_mid); + tx_parse_bd->data.mac_addr.dst_lo = + rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_lo); + tx_parse_bd->data.mac_addr.src_hi = + rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_hi); + tx_parse_bd->data.mac_addr.src_mid = + rte_cpu_to_be_16(tx_parse_bd->data. + mac_addr.src_mid); + tx_parse_bd->data.mac_addr.src_lo = + rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_lo); PMD_TX_LOG(DEBUG, - "start bd: nbytes %d flags %x vlan %x\n", - tx_start_bd->nbytes, - tx_start_bd->bd_flags.as_bitfield, - tx_start_bd->vlan_or_ethertype); + "PBD dst %x %x %x src %x %x %x p_data %x", + tx_parse_bd->data.mac_addr.dst_hi, + tx_parse_bd->data.mac_addr.dst_mid, + tx_parse_bd->data.mac_addr.dst_lo, + tx_parse_bd->data.mac_addr.src_hi, + tx_parse_bd->data.mac_addr.src_mid, + tx_parse_bd->data.mac_addr.src_lo, + tx_parse_bd->parsing_data); + } - bd_prod = NEXT_TX_BD(bd_prod); - pkt_prod++; + PMD_TX_LOG(DEBUG, + "start bd: nbytes %d flags %x vlan %x\n", + tx_start_bd->nbytes, + tx_start_bd->bd_flags.as_bitfield, + tx_start_bd->vlan_or_ethertype); - if (TX_IDX(bd_prod) < 2) { - nbds++; - } - } + bd_prod = NEXT_TX_BD(bd_prod); + pkt_prod++; + + if (TX_IDX(bd_prod) < 2) + nbds++; - txq->nb_tx_avail -= m_pkts << 1; + txq->nb_tx_avail -= 1 << 1; txq->tx_bd_tail = bd_prod; txq->tx_pkt_tail = pkt_prod; - mb(); - fp->tx_db.data.prod += (m_pkts << 1) + nbds; - DOORBELL(sc, txq->queue_id, fp->tx_db.raw); - mb(); - - return 0; + return nbds + 2; } static uint16_t bnx2x_cid_ilt_lines(struct bnx2x_softc *sc) diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h index 135a6eb..b3a3866 100644 --- a/drivers/net/bnx2x/bnx2x.h +++ b/drivers/net/bnx2x/bnx2x.h @@ -167,6 +167,8 @@ struct bnx2x_device_type { #define TX_PAGE(x) (((x) & ~USABLE_TX_BD_PER_PAGE) >> 8) #define TX_IDX(x) ((x) & USABLE_TX_BD_PER_PAGE) +#define BDS_PER_TX_PKT (3) + /* * Trigger pending transmits when the number of available BDs is greater * than 1/8 of the total number of usable BDs. @@ -1864,7 +1866,7 @@ int bnx2x_alloc_hsi_mem(struct bnx2x_softc *sc); int bnx2x_alloc_ilt_mem(struct bnx2x_softc *sc); void bnx2x_free_ilt_mem(struct bnx2x_softc *sc); void bnx2x_dump_tx_chain(struct bnx2x_fastpath * fp, int bd_prod, int count); -int bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf **m_head, int m_pkts); +uint32_t bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf *m0); uint8_t bnx2x_txeof(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp); void bnx2x_print_adapter_info(struct bnx2x_softc *sc); int bnx2x_intr_legacy(struct bnx2x_softc *sc, int scan_fp); diff --git a/drivers/net/bnx2x/bnx2x_rxtx.c b/drivers/net/bnx2x/bnx2x_rxtx.c index 752a5e8..8b047d4 100644 --- a/drivers/net/bnx2x/bnx2x_rxtx.c +++ b/drivers/net/bnx2x/bnx2x_rxtx.c @@ -222,40 +222,37 @@ bnx2x_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) struct bnx2x_tx_queue *txq; struct bnx2x_softc *sc; struct bnx2x_fastpath *fp; - uint32_t burst, nb_tx; - struct rte_mbuf **m = tx_pkts; - int ret; + uint32_t ret; + uint16_t nb_tx_pkts; + uint16_t nb_pkt_sent = 0; txq = p_txq; sc = txq->sc; fp = &sc->fp[txq->queue_id]; - nb_tx = nb_pkts; + if ((unlikely((txq->nb_tx_desc - txq->nb_tx_avail) > + txq->tx_free_thresh))) + bnx2x_txeof(sc, fp); - do { - burst = RTE_MIN(nb_pkts, RTE_PMD_BNX2X_TX_MAX_BURST); + nb_tx_pkts = RTE_MIN(nb_pkts, txq->nb_tx_avail / BDS_PER_TX_PKT); - ret = bnx2x_tx_encap(txq, m, burst); - if (unlikely(ret)) { - PMD_TX_LOG(ERR, "tx_encap failed!"); - } - - bnx2x_update_fp_sb_idx(fp); - - if ((txq->nb_tx_desc - txq->nb_tx_avail) > txq->tx_free_thresh) { - bnx2x_txeof(sc, fp); - } - - if (unlikely(ret == -ENOMEM)) { - break; - } + if (unlikely(nb_tx_pkts == 0)) + return 0; - m += burst; - nb_pkts -= burst; + while (nb_tx_pkts--) { + struct rte_mbuf *m = *tx_pkts++; + assert(m != NULL); + ret = bnx2x_tx_encap(txq, m); + fp->tx_db.data.prod += ret; + nb_pkt_sent++; + } - } while (nb_pkts); + bnx2x_update_fp_sb_idx(fp); + mb(); + DOORBELL(sc, txq->queue_id, fp->tx_db.raw); + mb(); - return nb_tx - nb_pkts; + return nb_pkt_sent; } int -- 1.7.10.3