From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx0b-0016ce01.pphosted.com (mx0a-0016ce01.pphosted.com [67.231.148.157]) by dpdk.org (Postfix) with ESMTP id 6F7D86938 for ; Thu, 12 May 2016 02:06:53 +0200 (CEST) Received: from pps.filterd (m0095336.ppops.net [127.0.0.1]) by mx0a-0016ce01.pphosted.com (8.16.0.17/8.16.0.17) with SMTP id u4C04dEf007656; Wed, 11 May 2016 17:06:52 -0700 Received: from avcashub1.qlogic.com ([198.186.0.117]) by mx0a-0016ce01.pphosted.com with ESMTP id 22tyrfe2y0-1 (version=TLSv1 cipher=AES128-SHA bits=128 verify=NOT); Wed, 11 May 2016 17:06:52 -0700 Received: from avluser05.qlc.com (10.1.113.115) by qlc.com (10.1.4.192) with Microsoft SMTP Server id 14.3.235.1; Wed, 11 May 2016 17:06:51 -0700 Received: (from rmody@localhost) by avluser05.qlc.com (8.14.4/8.14.4/Submit) id u4C06qFn015498; Wed, 11 May 2016 17:06:52 -0700 X-Authentication-Warning: avluser05.qlc.com: rmody set sender to rasesh.mody@qlogic.com using -f From: Rasesh Mody To: CC: , , Rasesh Mody , Harish Patil Date: Wed, 11 May 2016 17:06:24 -0700 Message-ID: <1463011585-15429-4-git-send-email-rasesh.mody@qlogic.com> X-Mailer: git-send-email 1.7.10.3 In-Reply-To: <1463011585-15429-1-git-send-email-rasesh.mody@qlogic.com> References: <1463011585-15429-1-git-send-email-rasesh.mody@qlogic.com> MIME-Version: 1.0 Content-Type: text/plain disclaimer: bypass X-Proofpoint-Virus-Version: vendor=nai engine=5800 definitions=8162 signatures=670716 X-Proofpoint-Spam-Details: rule=notspam policy=default score=0 priorityscore=1501 suspectscore=4 phishscore=0 bulkscore=0 spamscore=0 clxscore=1015 impostorscore=0 lowpriorityscore=0 adultscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.0.1-1604210000 definitions=main-1605110314 Subject: [dpdk-dev] [PATCH v5 4/5] bnx2x: use single doorbell for TX X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 12 May 2016 00:06:53 -0000 Change the Tx routine to ring the doorbell once per burst and not on every Tx packet. This driver-level optimization is necessary to achieve line rates for larger frame sizes (1k or more). Signed-off-by: Rasesh Mody Signed-off-by: Harish Patil --- drivers/net/bnx2x/bnx2x.c | 200 +++++++++++++++++++--------------------- drivers/net/bnx2x/bnx2x.h | 3 +- drivers/net/bnx2x/bnx2x_rxtx.c | 23 +++-- 3 files changed, 107 insertions(+), 119 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c index 5d182c3..f7ee77e 100644 --- a/drivers/net/bnx2x/bnx2x.c +++ b/drivers/net/bnx2x/bnx2x.c @@ -1293,7 +1293,7 @@ bnx2x_free_tx_pkt(__rte_unused struct bnx2x_fastpath *fp, struct bnx2x_tx_queue struct rte_mbuf *tx_mbuf = txq->sw_ring[TX_BD(pkt_idx, txq)]; if (likely(tx_mbuf != NULL)) { - rte_pktmbuf_free(tx_mbuf); + rte_pktmbuf_free_seg(tx_mbuf); } else { PMD_RX_LOG(ERR, "fp[%02d] lost mbuf %lu", fp->index, (unsigned long)TX_BD(pkt_idx, txq)); @@ -2113,141 +2113,127 @@ bnx2x_nic_unload(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_link * the mbuf and return to the caller. * * Returns: - * void. + * int: Number of TX BDs used for the mbuf * * Note the side effect that an mbuf may be freed if it causes a problem. */ -void bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf **m_head, - int m_pkts) +int bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf *m0) { - struct rte_mbuf *m0; struct eth_tx_start_bd *tx_start_bd; uint16_t bd_prod, pkt_prod; - int m_tx; struct bnx2x_softc *sc; uint32_t nbds = 0; - struct bnx2x_fastpath *fp; sc = txq->sc; - fp = &sc->fp[txq->queue_id]; - bd_prod = txq->tx_bd_tail; pkt_prod = txq->tx_pkt_tail; - for (m_tx = 0; m_tx < m_pkts; m_tx++) { - - m0 = *m_head++; + txq->sw_ring[TX_BD(pkt_prod, txq)] = m0; - txq->sw_ring[TX_BD(pkt_prod, txq)] = m0; + tx_start_bd = &txq->tx_ring[TX_BD(bd_prod, txq)].start_bd; - tx_start_bd = &txq->tx_ring[TX_BD(bd_prod, txq)].start_bd; + tx_start_bd->addr = + rte_cpu_to_le_64(rte_mbuf_data_dma_addr(m0)); + tx_start_bd->nbytes = rte_cpu_to_le_16(m0->data_len); + tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD; + tx_start_bd->general_data = + (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT); - tx_start_bd->addr = - rte_cpu_to_le_64(rte_mbuf_data_dma_addr(m0)); - tx_start_bd->nbytes = rte_cpu_to_le_16(m0->data_len); - tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD; - tx_start_bd->general_data = - (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT); + tx_start_bd->nbd = rte_cpu_to_le_16(2); - tx_start_bd->nbd = rte_cpu_to_le_16(2); + if (m0->ol_flags & PKT_TX_VLAN_PKT) { + tx_start_bd->vlan_or_ethertype = + rte_cpu_to_le_16(m0->vlan_tci); + tx_start_bd->bd_flags.as_bitfield |= + (X_ETH_OUTBAND_VLAN << + ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT); + } else { + if (IS_PF(sc)) + tx_start_bd->vlan_or_ethertype = + rte_cpu_to_le_16(pkt_prod); + else { + struct ether_hdr *eh = + rte_pktmbuf_mtod(m0, struct ether_hdr *); - if (m0->ol_flags & PKT_TX_VLAN_PKT) { tx_start_bd->vlan_or_ethertype = - rte_cpu_to_le_16(m0->vlan_tci); - tx_start_bd->bd_flags.as_bitfield |= - (X_ETH_OUTBAND_VLAN << - ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT); - } else { - if (IS_PF(sc)) - tx_start_bd->vlan_or_ethertype = - rte_cpu_to_le_16(pkt_prod); - else { - struct ether_hdr *eh - = rte_pktmbuf_mtod(m0, struct ether_hdr *); - - tx_start_bd->vlan_or_ethertype - = rte_cpu_to_le_16(rte_be_to_cpu_16(eh->ether_type)); - } + rte_cpu_to_le_16(rte_be_to_cpu_16(eh->ether_type)); } + } - bd_prod = NEXT_TX_BD(bd_prod); - if (IS_VF(sc)) { - struct eth_tx_parse_bd_e2 *tx_parse_bd; - const struct ether_hdr *eh = rte_pktmbuf_mtod(m0, struct ether_hdr *); - uint8_t mac_type = UNICAST_ADDRESS; - - tx_parse_bd = - &txq->tx_ring[TX_BD(bd_prod, txq)].parse_bd_e2; - if (is_multicast_ether_addr(&eh->d_addr)) { - if (is_broadcast_ether_addr(&eh->d_addr)) - mac_type = BROADCAST_ADDRESS; - else - mac_type = MULTICAST_ADDRESS; - } - tx_parse_bd->parsing_data = - (mac_type << ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE_SHIFT); - - rte_memcpy(&tx_parse_bd->data.mac_addr.dst_hi, - &eh->d_addr.addr_bytes[0], 2); - rte_memcpy(&tx_parse_bd->data.mac_addr.dst_mid, - &eh->d_addr.addr_bytes[2], 2); - rte_memcpy(&tx_parse_bd->data.mac_addr.dst_lo, - &eh->d_addr.addr_bytes[4], 2); - rte_memcpy(&tx_parse_bd->data.mac_addr.src_hi, - &eh->s_addr.addr_bytes[0], 2); - rte_memcpy(&tx_parse_bd->data.mac_addr.src_mid, - &eh->s_addr.addr_bytes[2], 2); - rte_memcpy(&tx_parse_bd->data.mac_addr.src_lo, - &eh->s_addr.addr_bytes[4], 2); - - tx_parse_bd->data.mac_addr.dst_hi = - rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_hi); - tx_parse_bd->data.mac_addr.dst_mid = - rte_cpu_to_be_16(tx_parse_bd->data. - mac_addr.dst_mid); - tx_parse_bd->data.mac_addr.dst_lo = - rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_lo); - tx_parse_bd->data.mac_addr.src_hi = - rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_hi); - tx_parse_bd->data.mac_addr.src_mid = - rte_cpu_to_be_16(tx_parse_bd->data. - mac_addr.src_mid); - tx_parse_bd->data.mac_addr.src_lo = - rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_lo); - - PMD_TX_LOG(DEBUG, - "PBD dst %x %x %x src %x %x %x p_data %x", - tx_parse_bd->data.mac_addr.dst_hi, - tx_parse_bd->data.mac_addr.dst_mid, - tx_parse_bd->data.mac_addr.dst_lo, - tx_parse_bd->data.mac_addr.src_hi, - tx_parse_bd->data.mac_addr.src_mid, - tx_parse_bd->data.mac_addr.src_lo, - tx_parse_bd->parsing_data); - } + bd_prod = NEXT_TX_BD(bd_prod); + if (IS_VF(sc)) { + struct eth_tx_parse_bd_e2 *tx_parse_bd; + const struct ether_hdr *eh = + rte_pktmbuf_mtod(m0, struct ether_hdr *); + uint8_t mac_type = UNICAST_ADDRESS; + + tx_parse_bd = + &txq->tx_ring[TX_BD(bd_prod, txq)].parse_bd_e2; + if (is_multicast_ether_addr(&eh->d_addr)) { + if (is_broadcast_ether_addr(&eh->d_addr)) + mac_type = BROADCAST_ADDRESS; + else + mac_type = MULTICAST_ADDRESS; + } + tx_parse_bd->parsing_data = + (mac_type << ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE_SHIFT); + + rte_memcpy(&tx_parse_bd->data.mac_addr.dst_hi, + &eh->d_addr.addr_bytes[0], 2); + rte_memcpy(&tx_parse_bd->data.mac_addr.dst_mid, + &eh->d_addr.addr_bytes[2], 2); + rte_memcpy(&tx_parse_bd->data.mac_addr.dst_lo, + &eh->d_addr.addr_bytes[4], 2); + rte_memcpy(&tx_parse_bd->data.mac_addr.src_hi, + &eh->s_addr.addr_bytes[0], 2); + rte_memcpy(&tx_parse_bd->data.mac_addr.src_mid, + &eh->s_addr.addr_bytes[2], 2); + rte_memcpy(&tx_parse_bd->data.mac_addr.src_lo, + &eh->s_addr.addr_bytes[4], 2); + + tx_parse_bd->data.mac_addr.dst_hi = + rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_hi); + tx_parse_bd->data.mac_addr.dst_mid = + rte_cpu_to_be_16(tx_parse_bd->data. + mac_addr.dst_mid); + tx_parse_bd->data.mac_addr.dst_lo = + rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_lo); + tx_parse_bd->data.mac_addr.src_hi = + rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_hi); + tx_parse_bd->data.mac_addr.src_mid = + rte_cpu_to_be_16(tx_parse_bd->data. + mac_addr.src_mid); + tx_parse_bd->data.mac_addr.src_lo = + rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_lo); PMD_TX_LOG(DEBUG, - "start bd: nbytes %d flags %x vlan %x\n", - tx_start_bd->nbytes, - tx_start_bd->bd_flags.as_bitfield, - tx_start_bd->vlan_or_ethertype); + "PBD dst %x %x %x src %x %x %x p_data %x", + tx_parse_bd->data.mac_addr.dst_hi, + tx_parse_bd->data.mac_addr.dst_mid, + tx_parse_bd->data.mac_addr.dst_lo, + tx_parse_bd->data.mac_addr.src_hi, + tx_parse_bd->data.mac_addr.src_mid, + tx_parse_bd->data.mac_addr.src_lo, + tx_parse_bd->parsing_data); + } - bd_prod = NEXT_TX_BD(bd_prod); - pkt_prod++; + PMD_TX_LOG(DEBUG, + "start bd: nbytes %d flags %x vlan %x\n", + tx_start_bd->nbytes, + tx_start_bd->bd_flags.as_bitfield, + tx_start_bd->vlan_or_ethertype); - if (TX_IDX(bd_prod) < 2) { - nbds++; - } - } + bd_prod = NEXT_TX_BD(bd_prod); + pkt_prod++; + + if (TX_IDX(bd_prod) < 2) + nbds++; - txq->nb_tx_avail -= m_pkts << 1; + txq->nb_tx_avail -= 2; txq->tx_bd_tail = bd_prod; txq->tx_pkt_tail = pkt_prod; - mb(); - fp->tx_db.data.prod += (m_pkts << 1) + nbds; - DOORBELL(sc, txq->queue_id, fp->tx_db.raw); - mb(); + return nbds + 2; } static uint16_t bnx2x_cid_ilt_lines(struct bnx2x_softc *sc) diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h index 42700e7..c24a530 100644 --- a/drivers/net/bnx2x/bnx2x.h +++ b/drivers/net/bnx2x/bnx2x.h @@ -1866,8 +1866,7 @@ int bnx2x_alloc_hsi_mem(struct bnx2x_softc *sc); int bnx2x_alloc_ilt_mem(struct bnx2x_softc *sc); void bnx2x_free_ilt_mem(struct bnx2x_softc *sc); void bnx2x_dump_tx_chain(struct bnx2x_fastpath * fp, int bd_prod, int count); -void bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf **m_head, - int m_pkts); +int bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf *m0); uint8_t bnx2x_txeof(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp); void bnx2x_print_adapter_info(struct bnx2x_softc *sc); int bnx2x_intr_legacy(struct bnx2x_softc *sc, int scan_fp); diff --git a/drivers/net/bnx2x/bnx2x_rxtx.c b/drivers/net/bnx2x/bnx2x_rxtx.c index 3e1f83b..8bcb431 100644 --- a/drivers/net/bnx2x/bnx2x_rxtx.c +++ b/drivers/net/bnx2x/bnx2x_rxtx.c @@ -222,10 +222,9 @@ bnx2x_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) struct bnx2x_tx_queue *txq; struct bnx2x_softc *sc; struct bnx2x_fastpath *fp; - uint32_t burst; - struct rte_mbuf **m = tx_pkts; uint16_t nb_tx_pkts; uint16_t nb_pkt_sent = 0; + uint32_t ret; txq = p_txq; sc = txq->sc; @@ -239,19 +238,23 @@ bnx2x_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) if (unlikely(nb_tx_pkts == 0)) return 0; - burst = RTE_MIN(nb_tx_pkts, RTE_PMD_BNX2X_TX_MAX_BURST); - while (nb_tx_pkts--) { + struct rte_mbuf *m = *tx_pkts++; assert(m != NULL); - bnx2x_tx_encap(txq, m, burst); - bnx2x_update_fp_sb_idx(fp); - if ((txq->nb_tx_desc - txq->nb_tx_avail) > - txq->tx_free_thresh) - bnx2x_txeof(sc, fp); - m += burst; + ret = bnx2x_tx_encap(txq, m); + fp->tx_db.data.prod += ret; nb_pkt_sent++; } + bnx2x_update_fp_sb_idx(fp); + mb(); + DOORBELL(sc, txq->queue_id, fp->tx_db.raw); + mb(); + + if ((txq->nb_tx_desc - txq->nb_tx_avail) > + txq->tx_free_thresh) + bnx2x_txeof(sc, fp); + return nb_pkt_sent; } -- 1.7.10.3