DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] app/testpmd: use mempool bulk get for txonly mode
@ 2019-02-28 19:42 Pavan Nikhilesh Bhagavatula
  2019-03-01  7:38 ` Andrew Rybchenko
                   ` (5 more replies)
  0 siblings, 6 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-02-28 19:42 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, wenzhuo.lu, bernard.iremonger
  Cc: dev, Pavan Nikhilesh Bhagavatula

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use mempool bulk get ops to alloc burst of packets and process them
instead of calling pktalloc for every packet.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test-pmd/txonly.c | 139 +++++++++++++++++++++---------------------
 1 file changed, 71 insertions(+), 68 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 1f08b6ed3..eef8b3a45 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -147,6 +147,61 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
 	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
 }
 
+static inline bool
+pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
+		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
+		const uint16_t vlan_tci_outer, const uint64_t ol_flags)
+{
+	uint32_t nb_segs, pkt_len = 0;
+	struct rte_mbuf *pkt_seg;
+	uint8_t i;
+
+	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
+		nb_segs = random() % tx_pkt_nb_segs + 1;
+	else
+		nb_segs = tx_pkt_nb_segs;
+
+	rte_pktmbuf_reset_headroom(pkt);
+	pkt->data_len = tx_pkt_seg_lengths[0];
+	pkt->ol_flags = ol_flags;
+	pkt->vlan_tci = vlan_tci;
+	pkt->vlan_tci_outer = vlan_tci_outer;
+	pkt->l2_len = sizeof(struct ether_hdr);
+	pkt->l3_len = sizeof(struct ipv4_hdr);
+
+	pkt_seg = pkt;
+	for (i = 1; i < nb_segs; i++) {
+		pkt_seg->next = rte_mbuf_raw_alloc(mbp);
+		if (pkt_seg->next == NULL) {
+			pkt->nb_segs = i;
+			rte_pktmbuf_free(pkt);
+			return false;
+		}
+		pkt_seg = pkt_seg->next;
+		pkt_seg->data_len = tx_pkt_seg_lengths[i];
+		pkt_len += pkt_seg->data_len;
+	}
+	pkt_seg->next = NULL; /* Last segment of packet. */
+	/*
+	 * Copy headers in first packet segment(s).
+	 */
+	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
+	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
+			sizeof(struct ether_hdr));
+	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
+			sizeof(struct ether_hdr) +
+			sizeof(struct ipv4_hdr));
+
+	/*
+	 * Complete first mbuf of packet and append it to the
+	 * burst of packets to be transmitted.
+	 */
+	pkt->nb_segs = nb_segs;
+	pkt->pkt_len += pkt_len;
+
+	return true;
+}
+
 /*
  * Transmit a burst of multi-segments packets.
  */
@@ -155,8 +210,6 @@ pkt_burst_transmit(struct fwd_stream *fs)
 {
 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
 	struct rte_port *txp;
-	struct rte_mbuf *pkt;
-	struct rte_mbuf *pkt_seg;
 	struct rte_mempool *mbp;
 	struct ether_hdr eth_hdr;
 	uint16_t nb_tx;
@@ -164,14 +217,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	uint16_t vlan_tci, vlan_tci_outer;
 	uint32_t retry;
 	uint64_t ol_flags = 0;
-	uint8_t  i;
 	uint64_t tx_offloads;
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	uint64_t start_tsc;
 	uint64_t end_tsc;
 	uint64_t core_cycles;
 #endif
-	uint32_t nb_segs, pkt_len;
 
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	start_tsc = rte_rdtsc();
@@ -188,72 +239,24 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		ol_flags |= PKT_TX_QINQ_PKT;
 	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
 		ol_flags |= PKT_TX_MACSEC;
-	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
-		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL) {
-		nomore_mbuf:
-			if (nb_pkt == 0)
-				return;
-			break;
-		}
 
-		/*
-		 * Using raw alloc is good to improve performance,
-		 * but some consumers may use the headroom and so
-		 * decrement data_off. We need to make sure it is
-		 * reset to default value.
-		 */
-		rte_pktmbuf_reset_headroom(pkt);
-		pkt->data_len = tx_pkt_seg_lengths[0];
-		pkt_seg = pkt;
-		if (tx_pkt_split == TX_PKT_SPLIT_RND)
-			nb_segs = random() % tx_pkt_nb_segs + 1;
-		else
-			nb_segs = tx_pkt_nb_segs;
-		pkt_len = pkt->data_len;
-		for (i = 1; i < nb_segs; i++) {
-			pkt_seg->next = rte_mbuf_raw_alloc(mbp);
-			if (pkt_seg->next == NULL) {
-				pkt->nb_segs = i;
-				rte_pktmbuf_free(pkt);
-				goto nomore_mbuf;
-			}
-			pkt_seg = pkt_seg->next;
-			pkt_seg->data_len = tx_pkt_seg_lengths[i];
-			pkt_len += pkt_seg->data_len;
-		}
-		pkt_seg->next = NULL; /* Last segment of packet. */
-
-		/*
-		 * Initialize Ethernet header.
-		 */
-		ether_addr_copy(&peer_eth_addrs[fs->peer_addr],&eth_hdr.d_addr);
-		ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
-		eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
-
-		/*
-		 * Copy headers in first packet segment(s).
-		 */
-		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
-		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
-				sizeof(struct ether_hdr));
-		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
-				sizeof(struct ether_hdr) +
-				sizeof(struct ipv4_hdr));
-
-		/*
-		 * Complete first mbuf of packet and append it to the
-		 * burst of packets to be transmitted.
-		 */
-		pkt->nb_segs = nb_segs;
-		pkt->pkt_len = pkt_len;
-		pkt->ol_flags = ol_flags;
-		pkt->vlan_tci = vlan_tci;
-		pkt->vlan_tci_outer = vlan_tci_outer;
-		pkt->l2_len = sizeof(struct ether_hdr);
-		pkt->l3_len = sizeof(struct ipv4_hdr);
-		pkts_burst[nb_pkt] = pkt;
+	/*
+	 * Initialize Ethernet header.
+	 */
+	ether_addr_copy(&peer_eth_addrs[fs->peer_addr], &eth_hdr.d_addr);
+	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
+	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+
+	if (rte_mempool_get_bulk(mbp, (void **)pkts_burst, nb_pkt_per_burst))
+		return;
+
+	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+		if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
+				&eth_hdr, vlan_tci, vlan_tci_outer, ol_flags)))
+			goto tx_pkts;
 	}
+tx_pkts:
+
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
 	/*
 	 * Retry if necessary
-- 
2.21.0

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH] app/testpmd: use mempool bulk get for txonly mode
  2019-02-28 19:42 [dpdk-dev] [PATCH] app/testpmd: use mempool bulk get for txonly mode Pavan Nikhilesh Bhagavatula
@ 2019-03-01  7:38 ` Andrew Rybchenko
  2019-03-01  8:45   ` [dpdk-dev] [EXT] " Pavan Nikhilesh Bhagavatula
  2019-03-01 13:47 ` [dpdk-dev] [PATCH v2] app/testpmd: add " Pavan Nikhilesh Bhagavatula
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 68+ messages in thread
From: Andrew Rybchenko @ 2019-03-01  7:38 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran,
	wenzhuo.lu, bernard.iremonger
  Cc: dev

On 2/28/19 10:42 PM, Pavan Nikhilesh Bhagavatula wrote:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Use mempool bulk get ops to alloc burst of packets and process them
> instead of calling pktalloc for every packet.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>   app/test-pmd/txonly.c | 139 +++++++++++++++++++++---------------------
>   1 file changed, 71 insertions(+), 68 deletions(-)
>
> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
> index 1f08b6ed3..eef8b3a45 100644
> --- a/app/test-pmd/txonly.c
> +++ b/app/test-pmd/txonly.c
> @@ -147,6 +147,61 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
>   	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
>   }
>   
> +static inline bool
> +pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
> +		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
> +		const uint16_t vlan_tci_outer, const uint64_t ol_flags)
> +{
> +	uint32_t nb_segs, pkt_len = 0;
> +	struct rte_mbuf *pkt_seg;
> +	uint8_t i;
> +
> +	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
> +		nb_segs = random() % tx_pkt_nb_segs + 1;
> +	else
> +		nb_segs = tx_pkt_nb_segs;
> +
> +	rte_pktmbuf_reset_headroom(pkt);
> +	pkt->data_len = tx_pkt_seg_lengths[0];
> +	pkt->ol_flags = ol_flags;
> +	pkt->vlan_tci = vlan_tci;
> +	pkt->vlan_tci_outer = vlan_tci_outer;
> +	pkt->l2_len = sizeof(struct ether_hdr);
> +	pkt->l3_len = sizeof(struct ipv4_hdr);
> +
> +	pkt_seg = pkt;
> +	for (i = 1; i < nb_segs; i++) {
> +		pkt_seg->next = rte_mbuf_raw_alloc(mbp);

Why is bulk allocation not used here?

> +		if (pkt_seg->next == NULL) {
> +			pkt->nb_segs = i;
> +			rte_pktmbuf_free(pkt);
> +			return false;
> +		}
> +		pkt_seg = pkt_seg->next;
> +		pkt_seg->data_len = tx_pkt_seg_lengths[i];
> +		pkt_len += pkt_seg->data_len;
> +	}
> +	pkt_seg->next = NULL; /* Last segment of packet. */
> +	/*
> +	 * Copy headers in first packet segment(s).
> +	 */
> +	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
> +	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> +			sizeof(struct ether_hdr));
> +	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> +			sizeof(struct ether_hdr) +
> +			sizeof(struct ipv4_hdr));
> +
> +	/*
> +	 * Complete first mbuf of packet and append it to the
> +	 * burst of packets to be transmitted.
> +	 */
> +	pkt->nb_segs = nb_segs;
> +	pkt->pkt_len += pkt_len;
> +
> +	return true;
> +}
> +
>   /*
>    * Transmit a burst of multi-segments packets.
>    */
> @@ -155,8 +210,6 @@ pkt_burst_transmit(struct fwd_stream *fs)
>   {
>   	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
>   	struct rte_port *txp;
> -	struct rte_mbuf *pkt;
> -	struct rte_mbuf *pkt_seg;
>   	struct rte_mempool *mbp;
>   	struct ether_hdr eth_hdr;
>   	uint16_t nb_tx;
> @@ -164,14 +217,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
>   	uint16_t vlan_tci, vlan_tci_outer;
>   	uint32_t retry;
>   	uint64_t ol_flags = 0;
> -	uint8_t  i;
>   	uint64_t tx_offloads;
>   #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>   	uint64_t start_tsc;
>   	uint64_t end_tsc;
>   	uint64_t core_cycles;
>   #endif
> -	uint32_t nb_segs, pkt_len;
>   
>   #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>   	start_tsc = rte_rdtsc();
> @@ -188,72 +239,24 @@ pkt_burst_transmit(struct fwd_stream *fs)
>   		ol_flags |= PKT_TX_QINQ_PKT;
>   	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
>   		ol_flags |= PKT_TX_MACSEC;
> -	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
> -		pkt = rte_mbuf_raw_alloc(mbp);
> -		if (pkt == NULL) {
> -		nomore_mbuf:
> -			if (nb_pkt == 0)
> -				return;
> -			break;
> -		}
>   
> -		/*
> -		 * Using raw alloc is good to improve performance,
> -		 * but some consumers may use the headroom and so
> -		 * decrement data_off. We need to make sure it is
> -		 * reset to default value.
> -		 */
> -		rte_pktmbuf_reset_headroom(pkt);
> -		pkt->data_len = tx_pkt_seg_lengths[0];
> -		pkt_seg = pkt;
> -		if (tx_pkt_split == TX_PKT_SPLIT_RND)
> -			nb_segs = random() % tx_pkt_nb_segs + 1;
> -		else
> -			nb_segs = tx_pkt_nb_segs;
> -		pkt_len = pkt->data_len;
> -		for (i = 1; i < nb_segs; i++) {
> -			pkt_seg->next = rte_mbuf_raw_alloc(mbp);
> -			if (pkt_seg->next == NULL) {
> -				pkt->nb_segs = i;
> -				rte_pktmbuf_free(pkt);
> -				goto nomore_mbuf;
> -			}
> -			pkt_seg = pkt_seg->next;
> -			pkt_seg->data_len = tx_pkt_seg_lengths[i];
> -			pkt_len += pkt_seg->data_len;
> -		}
> -		pkt_seg->next = NULL; /* Last segment of packet. */
> -
> -		/*
> -		 * Initialize Ethernet header.
> -		 */
> -		ether_addr_copy(&peer_eth_addrs[fs->peer_addr],&eth_hdr.d_addr);
> -		ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
> -		eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> -
> -		/*
> -		 * Copy headers in first packet segment(s).
> -		 */
> -		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
> -		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> -				sizeof(struct ether_hdr));
> -		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> -				sizeof(struct ether_hdr) +
> -				sizeof(struct ipv4_hdr));
> -
> -		/*
> -		 * Complete first mbuf of packet and append it to the
> -		 * burst of packets to be transmitted.
> -		 */
> -		pkt->nb_segs = nb_segs;
> -		pkt->pkt_len = pkt_len;
> -		pkt->ol_flags = ol_flags;
> -		pkt->vlan_tci = vlan_tci;
> -		pkt->vlan_tci_outer = vlan_tci_outer;
> -		pkt->l2_len = sizeof(struct ether_hdr);
> -		pkt->l3_len = sizeof(struct ipv4_hdr);
> -		pkts_burst[nb_pkt] = pkt;
> +	/*
> +	 * Initialize Ethernet header.
> +	 */
> +	ether_addr_copy(&peer_eth_addrs[fs->peer_addr], &eth_hdr.d_addr);
> +	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
> +	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> +
> +	if (rte_mempool_get_bulk(mbp, (void **)pkts_burst, nb_pkt_per_burst))
> +		return;

Before the patch the code survived insufficient of mbufs condition and
sent as much as it can allocate. Now it is not. I can't say for sure if the
new behaviour is acceptable or not (I'd say no), but even if it is 
acceptable
it should be highlighted in the changeset description.

Taking segments allocation into account may I suggest to consider
a bit sophisticated implementation which allocates packets in bulks
with fallback to individual mbufs allocation and usage of the mechanism
for all segments (i.e. allocate bulk, use it, allocate next, use it, etc).

> +
> +	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
> +		if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
> +				&eth_hdr, vlan_tci, vlan_tci_outer, ol_flags)))
> +			goto tx_pkts;

If segment allocation fails, who frees remaining packets from the bulk?

>   	}
> +tx_pkts:
> +
>   	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
>   	/*
>   	 * Retry if necessary

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [EXT] Re: [PATCH] app/testpmd: use mempool bulk get for txonly mode
  2019-03-01  7:38 ` Andrew Rybchenko
@ 2019-03-01  8:45   ` Pavan Nikhilesh Bhagavatula
  0 siblings, 0 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-01  8:45 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, bernard.iremonger, arybchenko, wenzhuo.lu; +Cc: dev

On Fri, 2019-03-01 at 10:38 +0300, Andrew Rybchenko wrote:
> On 2/28/19 10:42 PM, Pavan Nikhilesh Bhagavatula wrote:
> > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > 
> > Use mempool bulk get ops to alloc burst of packets and process them
> > instead of calling pktalloc for every packet.
> > 
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > ---
> >  app/test-pmd/txonly.c | 139 +++++++++++++++++++++-----------------
> > ----
> >  1 file changed, 71 insertions(+), 68 deletions(-)
> > 
> > diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
> > index 1f08b6ed3..eef8b3a45 100644
> > --- a/app/test-pmd/txonly.c
> > +++ b/app/test-pmd/txonly.c
> > @@ -147,6 +147,61 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr
> > *ip_hdr,
> >  	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
> >  }
> >  
> > +static inline bool
> > +pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
> > +		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
> > +		const uint16_t vlan_tci_outer, const uint64_t ol_flags)
> > +{
> > +	uint32_t nb_segs, pkt_len = 0;
> > +	struct rte_mbuf *pkt_seg;
> > +	uint8_t i;
> > +
> > +	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
> > +		nb_segs = random() % tx_pkt_nb_segs + 1;
> > +	else
> > +		nb_segs = tx_pkt_nb_segs;
> > +
> > +	rte_pktmbuf_reset_headroom(pkt);
> > +	pkt->data_len = tx_pkt_seg_lengths[0];
> > +	pkt->ol_flags = ol_flags;
> > +	pkt->vlan_tci = vlan_tci;
> > +	pkt->vlan_tci_outer = vlan_tci_outer;
> > +	pkt->l2_len = sizeof(struct ether_hdr);
> > +	pkt->l3_len = sizeof(struct ipv4_hdr);
> > +
> > +	pkt_seg = pkt;
> > +	for (i = 1; i < nb_segs; i++) {
> > +		pkt_seg->next = rte_mbuf_raw_alloc(mbp);
>  
> Why is bulk allocation not used here?

Will update in v2.

> 
> > +		if (pkt_seg->next == NULL) {
> > +			pkt->nb_segs = i;
> > +			rte_pktmbuf_free(pkt);
> > +			return false;
> > +		}
> > +		pkt_seg = pkt_seg->next;
> > +		pkt_seg->data_len = tx_pkt_seg_lengths[i];
> > +		pkt_len += pkt_seg->data_len;
> > +	}
> > +	pkt_seg->next = NULL; /* Last segment of packet. */
> > +	/*
> > +	 * Copy headers in first packet segment(s).
> > +	 */
> > +	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
> > +	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> > +			sizeof(struct ether_hdr));
> > +	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> > +			sizeof(struct ether_hdr) +
> > +			sizeof(struct ipv4_hdr));
> > +
> > +	/*
> > +	 * Complete first mbuf of packet and append it to the
> > +	 * burst of packets to be transmitted.
> > +	 */
> > +	pkt->nb_segs = nb_segs;
> > +	pkt->pkt_len += pkt_len;
> > +
> > +	return true;
> > +}
> > +
> >  /*
> >   * Transmit a burst of multi-segments packets.
> >   */
> > @@ -155,8 +210,6 @@ pkt_burst_transmit(struct fwd_stream *fs)
> >  {
> >  	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> >  	struct rte_port *txp;
> > -	struct rte_mbuf *pkt;
> > -	struct rte_mbuf *pkt_seg;
> >  	struct rte_mempool *mbp;
> >  	struct ether_hdr eth_hdr;
> >  	uint16_t nb_tx;
> > @@ -164,14 +217,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
> >  	uint16_t vlan_tci, vlan_tci_outer;
> >  	uint32_t retry;
> >  	uint64_t ol_flags = 0;
> > -	uint8_t  i;
> >  	uint64_t tx_offloads;
> >  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
> >  	uint64_t start_tsc;
> >  	uint64_t end_tsc;
> >  	uint64_t core_cycles;
> >  #endif
> > -	uint32_t nb_segs, pkt_len;
> >  
> >  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
> >  	start_tsc = rte_rdtsc();
> > @@ -188,72 +239,24 @@ pkt_burst_transmit(struct fwd_stream *fs)
> >  		ol_flags |= PKT_TX_QINQ_PKT;
> >  	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
> >  		ol_flags |= PKT_TX_MACSEC;
> > -	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
> > -		pkt = rte_mbuf_raw_alloc(mbp);
> > -		if (pkt == NULL) {
> > -		nomore_mbuf:
> > -			if (nb_pkt == 0)
> > -				return;
> > -			break;
> > -		}
> >  
> > -		/*
> > -		 * Using raw alloc is good to improve performance,
> > -		 * but some consumers may use the headroom and so
> > -		 * decrement data_off. We need to make sure it is
> > -		 * reset to default value.
> > -		 */
> > -		rte_pktmbuf_reset_headroom(pkt);
> > -		pkt->data_len = tx_pkt_seg_lengths[0];
> > -		pkt_seg = pkt;
> > -		if (tx_pkt_split == TX_PKT_SPLIT_RND)
> > -			nb_segs = random() % tx_pkt_nb_segs + 1;
> > -		else
> > -			nb_segs = tx_pkt_nb_segs;
> > -		pkt_len = pkt->data_len;
> > -		for (i = 1; i < nb_segs; i++) {
> > -			pkt_seg->next = rte_mbuf_raw_alloc(mbp);
> > -			if (pkt_seg->next == NULL) {
> > -				pkt->nb_segs = i;
> > -				rte_pktmbuf_free(pkt);
> > -				goto nomore_mbuf;
> > -			}
> > -			pkt_seg = pkt_seg->next;
> > -			pkt_seg->data_len = tx_pkt_seg_lengths[i];
> > -			pkt_len += pkt_seg->data_len;
> > -		}
> > -		pkt_seg->next = NULL; /* Last segment of packet. */
> > -
> > -		/*
> > -		 * Initialize Ethernet header.
> > -		 */
> > -		ether_addr_copy(&peer_eth_addrs[fs-
> > >peer_addr],&eth_hdr.d_addr);
> > -		ether_addr_copy(&ports[fs->tx_port].eth_addr,
> > &eth_hdr.s_addr);
> > -		eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> > -
> > -		/*
> > -		 * Copy headers in first packet segment(s).
> > -		 */
> > -		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
> > -		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> > -				sizeof(struct ether_hdr));
> > -		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> > -				sizeof(struct ether_hdr) +
> > -				sizeof(struct ipv4_hdr));
> > -
> > -		/*
> > -		 * Complete first mbuf of packet and append it to the
> > -		 * burst of packets to be transmitted.
> > -		 */
> > -		pkt->nb_segs = nb_segs;
> > -		pkt->pkt_len = pkt_len;
> > -		pkt->ol_flags = ol_flags;
> > -		pkt->vlan_tci = vlan_tci;
> > -		pkt->vlan_tci_outer = vlan_tci_outer;
> > -		pkt->l2_len = sizeof(struct ether_hdr);
> > -		pkt->l3_len = sizeof(struct ipv4_hdr);
> > -		pkts_burst[nb_pkt] = pkt;
> > +	/*
> > +	 * Initialize Ethernet header.
> > +	 */
> > +	ether_addr_copy(&peer_eth_addrs[fs->peer_addr],
> > &eth_hdr.d_addr);
> > +	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
> > +	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> > +
> > +	if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
> > nb_pkt_per_burst))
> > +		return;
>  
> Before the patch the code survived insufficient of mbufs condition
> and
> sent as much as it can allocate. Now it is not. I can't say for sure
> if the
> new behaviour is acceptable or not (I'd say no), but even if it is
> acceptable
> it should be highlighted in the changeset description.
> 

Ack.

> Taking segments allocation into account may I suggest to consider
> a bit sophisticated implementation which allocates packets in bulks
> with fallback to individual mbufs allocation and usage of the
> mechanism
> for all segments (i.e. allocate bulk, use it, allocate next, use it,
> etc).

I think segment allocation can always use bulk as failure should always
free the entire packet chain. 
I will send v2 with fallback to individual mbuf allocs. 

> 
> > +
> > +	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
> > +		if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt],
> > mbp,
> > +				&eth_hdr, vlan_tci, vlan_tci_outer,
> > ol_flags)))
> > +			goto tx_pkts;
>  
> If segment allocation fails, who frees remaining packets from the
> bulk?

My bad. Thanks for the heads up.

> 
> >  	}
> > +tx_pkts:
> > +
> >  	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
> > nb_pkt);
> >  	/*
> >  	 * Retry if necessary
>  

^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for txonly mode
  2019-02-28 19:42 [dpdk-dev] [PATCH] app/testpmd: use mempool bulk get for txonly mode Pavan Nikhilesh Bhagavatula
  2019-03-01  7:38 ` Andrew Rybchenko
@ 2019-03-01 13:47 ` Pavan Nikhilesh Bhagavatula
  2019-03-19 16:48   ` Ferruh Yigit
  2019-03-26 11:00   ` Thomas Monjalon
  2019-03-26 12:26 ` [dpdk-dev] [PATCH v3 1/2] app/testpmd: optimize testpmd " Pavan Nikhilesh Bhagavatula
                   ` (3 subsequent siblings)
  5 siblings, 2 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-01 13:47 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, arybchenko, ferruh.yigit, bernard.iremonger
  Cc: dev, Pavan Nikhilesh Bhagavatula

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use mempool bulk get ops to alloc burst of packets and process them.
If bulk get fails fallback to rte_mbuf_raw_alloc.

Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---

 v2 Changes:
 - Use bulk ops for fetching segments. (Andrew Rybchenko)
 - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
 - Fix mbufs not being freed when there is no more mbufs available for
 segments. (Andrew Rybchenko)

 app/test-pmd/txonly.c | 159 ++++++++++++++++++++++++------------------
 1 file changed, 93 insertions(+), 66 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 1f08b6ed3..af0be89ca 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -147,6 +147,62 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
 	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
 }

+static inline bool
+pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
+		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
+		const uint16_t vlan_tci_outer, const uint64_t ol_flags)
+{
+	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
+	struct rte_mbuf *pkt_seg;
+	uint32_t nb_segs, pkt_len = 0;
+	uint8_t i;
+
+	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
+		nb_segs = random() % tx_pkt_nb_segs + 1;
+	else
+		nb_segs = tx_pkt_nb_segs;
+
+	if (nb_segs > 1) {
+		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs))
+			return false;
+	}
+
+	rte_pktmbuf_reset_headroom(pkt);
+	pkt->data_len = tx_pkt_seg_lengths[0];
+	pkt->ol_flags = ol_flags;
+	pkt->vlan_tci = vlan_tci;
+	pkt->vlan_tci_outer = vlan_tci_outer;
+	pkt->l2_len = sizeof(struct ether_hdr);
+	pkt->l3_len = sizeof(struct ipv4_hdr);
+
+	pkt_seg = pkt;
+	for (i = 1; i < nb_segs; i++) {
+		pkt_seg->next = pkt_segs[i - 1];
+		pkt_seg = pkt_seg->next;
+		pkt_seg->data_len = tx_pkt_seg_lengths[i];
+		pkt_len += pkt_seg->data_len;
+	}
+	pkt_seg->next = NULL; /* Last segment of packet. */
+	/*
+	 * Copy headers in first packet segment(s).
+	 */
+	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
+	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
+			sizeof(struct ether_hdr));
+	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
+			sizeof(struct ether_hdr) +
+			sizeof(struct ipv4_hdr));
+
+	/*
+	 * Complete first mbuf of packet and append it to the
+	 * burst of packets to be transmitted.
+	 */
+	pkt->nb_segs = nb_segs;
+	pkt->pkt_len += pkt_len;
+
+	return true;
+}
+
 /*
  * Transmit a burst of multi-segments packets.
  */
@@ -154,9 +210,8 @@ static void
 pkt_burst_transmit(struct fwd_stream *fs)
 {
 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
-	struct rte_port *txp;
 	struct rte_mbuf *pkt;
-	struct rte_mbuf *pkt_seg;
+	struct rte_port *txp;
 	struct rte_mempool *mbp;
 	struct ether_hdr eth_hdr;
 	uint16_t nb_tx;
@@ -164,14 +219,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	uint16_t vlan_tci, vlan_tci_outer;
 	uint32_t retry;
 	uint64_t ol_flags = 0;
-	uint8_t  i;
 	uint64_t tx_offloads;
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	uint64_t start_tsc;
 	uint64_t end_tsc;
 	uint64_t core_cycles;
 #endif
-	uint32_t nb_segs, pkt_len;

 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	start_tsc = rte_rdtsc();
@@ -188,72 +241,46 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		ol_flags |= PKT_TX_QINQ_PKT;
 	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
 		ol_flags |= PKT_TX_MACSEC;
-	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
-		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL) {
-		nomore_mbuf:
-			if (nb_pkt == 0)
-				return;
-			break;
-		}

-		/*
-		 * Using raw alloc is good to improve performance,
-		 * but some consumers may use the headroom and so
-		 * decrement data_off. We need to make sure it is
-		 * reset to default value.
-		 */
-		rte_pktmbuf_reset_headroom(pkt);
-		pkt->data_len = tx_pkt_seg_lengths[0];
-		pkt_seg = pkt;
-		if (tx_pkt_split == TX_PKT_SPLIT_RND)
-			nb_segs = random() % tx_pkt_nb_segs + 1;
-		else
-			nb_segs = tx_pkt_nb_segs;
-		pkt_len = pkt->data_len;
-		for (i = 1; i < nb_segs; i++) {
-			pkt_seg->next = rte_mbuf_raw_alloc(mbp);
-			if (pkt_seg->next == NULL) {
-				pkt->nb_segs = i;
-				rte_pktmbuf_free(pkt);
-				goto nomore_mbuf;
+	/*
+	 * Initialize Ethernet header.
+	 */
+	ether_addr_copy(&peer_eth_addrs[fs->peer_addr], &eth_hdr.d_addr);
+	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
+	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+
+	if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
+				nb_pkt_per_burst) == 0) {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put_bulk(mbp,
+						(void **)&pkts_burst[nb_pkt],
+						nb_pkt_per_burst - nb_pkt);
+				break;
+			}
+		}
+	} else {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			pkt = rte_mbuf_raw_alloc(mbp);
+			if (pkt == NULL)
+				break;
+			if (unlikely(!pkt_burst_prepare(pkt, mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put(mbp, pkt);
+				break;
 			}
-			pkt_seg = pkt_seg->next;
-			pkt_seg->data_len = tx_pkt_seg_lengths[i];
-			pkt_len += pkt_seg->data_len;
+			pkts_burst[nb_pkt] = pkt;
 		}
-		pkt_seg->next = NULL; /* Last segment of packet. */
-
-		/*
-		 * Initialize Ethernet header.
-		 */
-		ether_addr_copy(&peer_eth_addrs[fs->peer_addr],&eth_hdr.d_addr);
-		ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
-		eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
-
-		/*
-		 * Copy headers in first packet segment(s).
-		 */
-		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
-		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
-				sizeof(struct ether_hdr));
-		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
-				sizeof(struct ether_hdr) +
-				sizeof(struct ipv4_hdr));
-
-		/*
-		 * Complete first mbuf of packet and append it to the
-		 * burst of packets to be transmitted.
-		 */
-		pkt->nb_segs = nb_segs;
-		pkt->pkt_len = pkt_len;
-		pkt->ol_flags = ol_flags;
-		pkt->vlan_tci = vlan_tci;
-		pkt->vlan_tci_outer = vlan_tci_outer;
-		pkt->l2_len = sizeof(struct ether_hdr);
-		pkt->l3_len = sizeof(struct ipv4_hdr);
-		pkts_burst[nb_pkt] = pkt;
 	}
+
+	if (nb_pkt == 0)
+		return;
+
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
 	/*
 	 * Retry if necessary
--
2.21.0

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-01 13:47 ` [dpdk-dev] [PATCH v2] app/testpmd: add " Pavan Nikhilesh Bhagavatula
@ 2019-03-19 16:48   ` Ferruh Yigit
  2019-03-19 16:48     ` Ferruh Yigit
  2019-03-20  4:53     ` Pavan Nikhilesh Bhagavatula
  2019-03-26 11:00   ` Thomas Monjalon
  1 sibling, 2 replies; 68+ messages in thread
From: Ferruh Yigit @ 2019-03-19 16:48 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran,
	arybchenko, bernard.iremonger, Wenzhuo Lu, Jingjing Wu
  Cc: dev

On 3/1/2019 1:47 PM, Pavan Nikhilesh Bhagavatula wrote:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Use mempool bulk get ops to alloc burst of packets and process them.
> If bulk get fails fallback to rte_mbuf_raw_alloc.

I assume the motivation is performance improvement, do you have the numbers
before and after this patch?

cc'ed maintainers.

> 
> Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>

<...>

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-19 16:48   ` Ferruh Yigit
@ 2019-03-19 16:48     ` Ferruh Yigit
  2019-03-20  4:53     ` Pavan Nikhilesh Bhagavatula
  1 sibling, 0 replies; 68+ messages in thread
From: Ferruh Yigit @ 2019-03-19 16:48 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran,
	arybchenko, bernard.iremonger, Wenzhuo Lu, Jingjing Wu
  Cc: dev

On 3/1/2019 1:47 PM, Pavan Nikhilesh Bhagavatula wrote:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Use mempool bulk get ops to alloc burst of packets and process them.
> If bulk get fails fallback to rte_mbuf_raw_alloc.

I assume the motivation is performance improvement, do you have the numbers
before and after this patch?

cc'ed maintainers.

> 
> Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>

<...>


^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-19 16:48   ` Ferruh Yigit
  2019-03-19 16:48     ` Ferruh Yigit
@ 2019-03-20  4:53     ` Pavan Nikhilesh Bhagavatula
  2019-03-20  4:53       ` Pavan Nikhilesh Bhagavatula
  2019-03-26  8:43       ` Ferruh Yigit
  1 sibling, 2 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-20  4:53 UTC (permalink / raw)
  To: jingjing.wu, bernard.iremonger, arybchenko, wenzhuo.lu,
	Jerin Jacob Kollanukkaran, ferruh.yigit
  Cc: dev

On Tue, 2019-03-19 at 16:48 +0000, Ferruh Yigit wrote:
> On 3/1/2019 1:47 PM, Pavan Nikhilesh Bhagavatula wrote:
> > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > 
> > Use mempool bulk get ops to alloc burst of packets and process
> > them.
> > If bulk get fails fallback to rte_mbuf_raw_alloc.
> 
> I assume the motivation is performance improvement, do you have the
> numbers
> before and after this patch?

We are seeing approximately 500% improvement with the patch on
octeontx2.

> 
> cc'ed maintainers.
> 
> > Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> <...>
> 



^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-20  4:53     ` Pavan Nikhilesh Bhagavatula
@ 2019-03-20  4:53       ` Pavan Nikhilesh Bhagavatula
  2019-03-26  8:43       ` Ferruh Yigit
  1 sibling, 0 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-20  4:53 UTC (permalink / raw)
  To: jingjing.wu, bernard.iremonger, arybchenko, wenzhuo.lu,
	Jerin Jacob Kollanukkaran, ferruh.yigit
  Cc: dev

On Tue, 2019-03-19 at 16:48 +0000, Ferruh Yigit wrote:
> On 3/1/2019 1:47 PM, Pavan Nikhilesh Bhagavatula wrote:
> > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > 
> > Use mempool bulk get ops to alloc burst of packets and process
> > them.
> > If bulk get fails fallback to rte_mbuf_raw_alloc.
> 
> I assume the motivation is performance improvement, do you have the
> numbers
> before and after this patch?

We are seeing approximately 500% improvement with the patch on
octeontx2.

> 
> cc'ed maintainers.
> 
> > Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> <...>
> 



^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-20  4:53     ` Pavan Nikhilesh Bhagavatula
  2019-03-20  4:53       ` Pavan Nikhilesh Bhagavatula
@ 2019-03-26  8:43       ` Ferruh Yigit
  2019-03-26  8:43         ` Ferruh Yigit
  1 sibling, 1 reply; 68+ messages in thread
From: Ferruh Yigit @ 2019-03-26  8:43 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, jingjing.wu, bernard.iremonger,
	arybchenko, wenzhuo.lu, Jerin Jacob Kollanukkaran
  Cc: dev, Han, YingyaX

On 3/20/2019 4:53 AM, Pavan Nikhilesh Bhagavatula wrote:
> On Tue, 2019-03-19 at 16:48 +0000, Ferruh Yigit wrote:
>> On 3/1/2019 1:47 PM, Pavan Nikhilesh Bhagavatula wrote:
>>> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>>>
>>> Use mempool bulk get ops to alloc burst of packets and process
>>> them.
>>> If bulk get fails fallback to rte_mbuf_raw_alloc.
>>
>> I assume the motivation is performance improvement, do you have the
>> numbers
>> before and after this patch?
> 
> We are seeing approximately 500% improvement with the patch on
> octeontx2.

%500 is interesting.

btw, Intel also confirmed the improvement:
Tested-by: Yingya Han <yingyax.han@intel.com>

> 
>>
>> cc'ed maintainers.
>>
>>> Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
>>> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
>>
>> <...>
>>
> 
> 

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-26  8:43       ` Ferruh Yigit
@ 2019-03-26  8:43         ` Ferruh Yigit
  0 siblings, 0 replies; 68+ messages in thread
From: Ferruh Yigit @ 2019-03-26  8:43 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, jingjing.wu, bernard.iremonger,
	arybchenko, wenzhuo.lu, Jerin Jacob Kollanukkaran
  Cc: dev, Han, YingyaX

On 3/20/2019 4:53 AM, Pavan Nikhilesh Bhagavatula wrote:
> On Tue, 2019-03-19 at 16:48 +0000, Ferruh Yigit wrote:
>> On 3/1/2019 1:47 PM, Pavan Nikhilesh Bhagavatula wrote:
>>> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>>>
>>> Use mempool bulk get ops to alloc burst of packets and process
>>> them.
>>> If bulk get fails fallback to rte_mbuf_raw_alloc.
>>
>> I assume the motivation is performance improvement, do you have the
>> numbers
>> before and after this patch?
> 
> We are seeing approximately 500% improvement with the patch on
> octeontx2.

%500 is interesting.

btw, Intel also confirmed the improvement:
Tested-by: Yingya Han <yingyax.han@intel.com>

> 
>>
>> cc'ed maintainers.
>>
>>> Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
>>> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
>>
>> <...>
>>
> 
> 


^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-01 13:47 ` [dpdk-dev] [PATCH v2] app/testpmd: add " Pavan Nikhilesh Bhagavatula
  2019-03-19 16:48   ` Ferruh Yigit
@ 2019-03-26 11:00   ` Thomas Monjalon
  2019-03-26 11:00     ` Thomas Monjalon
  2019-03-26 11:50     ` Iremonger, Bernard
  1 sibling, 2 replies; 68+ messages in thread
From: Thomas Monjalon @ 2019-03-26 11:00 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula
  Cc: dev, Jerin Jacob Kollanukkaran, arybchenko, ferruh.yigit,
	bernard.iremonger

01/03/2019 14:47, Pavan Nikhilesh Bhagavatula:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Use mempool bulk get ops to alloc burst of packets and process them.
> If bulk get fails fallback to rte_mbuf_raw_alloc.
> 
> Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
> 
>  v2 Changes:
>  - Use bulk ops for fetching segments. (Andrew Rybchenko)
>  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
>  - Fix mbufs not being freed when there is no more mbufs available for
>  segments. (Andrew Rybchenko)
> 
>  app/test-pmd/txonly.c | 159 ++++++++++++++++++++++++------------------
>  1 file changed, 93 insertions(+), 66 deletions(-)

This is changing a lot of lines so it is difficult to know
what is changed exactly. Please split it with a refactoring
without any real change, and introduce the real change later.
Then we'll be able to examine it and check the performance.

We need to have more tests with more hardware in order
to better understand the performance improvement.
For info, a degradation is seen in Mellanox lab.

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-26 11:00   ` Thomas Monjalon
@ 2019-03-26 11:00     ` Thomas Monjalon
  2019-03-26 11:50     ` Iremonger, Bernard
  1 sibling, 0 replies; 68+ messages in thread
From: Thomas Monjalon @ 2019-03-26 11:00 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula
  Cc: dev, Jerin Jacob Kollanukkaran, arybchenko, ferruh.yigit,
	bernard.iremonger

01/03/2019 14:47, Pavan Nikhilesh Bhagavatula:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Use mempool bulk get ops to alloc burst of packets and process them.
> If bulk get fails fallback to rte_mbuf_raw_alloc.
> 
> Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
> 
>  v2 Changes:
>  - Use bulk ops for fetching segments. (Andrew Rybchenko)
>  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
>  - Fix mbufs not being freed when there is no more mbufs available for
>  segments. (Andrew Rybchenko)
> 
>  app/test-pmd/txonly.c | 159 ++++++++++++++++++++++++------------------
>  1 file changed, 93 insertions(+), 66 deletions(-)

This is changing a lot of lines so it is difficult to know
what is changed exactly. Please split it with a refactoring
without any real change, and introduce the real change later.
Then we'll be able to examine it and check the performance.

We need to have more tests with more hardware in order
to better understand the performance improvement.
For info, a degradation is seen in Mellanox lab.




^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-26 11:00   ` Thomas Monjalon
  2019-03-26 11:00     ` Thomas Monjalon
@ 2019-03-26 11:50     ` Iremonger, Bernard
  2019-03-26 11:50       ` Iremonger, Bernard
  2019-03-26 12:06       ` Pavan Nikhilesh Bhagavatula
  1 sibling, 2 replies; 68+ messages in thread
From: Iremonger, Bernard @ 2019-03-26 11:50 UTC (permalink / raw)
  To: Thomas Monjalon, Pavan Nikhilesh Bhagavatula
  Cc: dev, Jerin Jacob Kollanukkaran, arybchenko, Yigit, Ferruh

Hi Pavan,

<snip>

> Subject: Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for
> txonly mode
> 
> 01/03/2019 14:47, Pavan Nikhilesh Bhagavatula:
> > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >
> > Use mempool bulk get ops to alloc burst of packets and process them.
> > If bulk get fails fallback to rte_mbuf_raw_alloc.
> >
> > Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > ---
> >
> >  v2 Changes:
> >  - Use bulk ops for fetching segments. (Andrew Rybchenko)
> >  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew
> > Rybchenko)
> >  - Fix mbufs not being freed when there is no more mbufs available for
> > segments. (Andrew Rybchenko)
> >
> >  app/test-pmd/txonly.c | 159
> > ++++++++++++++++++++++++------------------
> >  1 file changed, 93 insertions(+), 66 deletions(-)
> 
> This is changing a lot of lines so it is difficult to know what is changed exactly.
> Please split it with a refactoring without any real change, and introduce the
> real change later.
> Then we'll be able to examine it and check the performance.
> 
> We need to have more tests with more hardware in order to better
> understand the performance improvement.
> For info, a degradation is seen in Mellanox lab.
> 
> 
+1 
Not easy to review.
Btw, unnecessary change at lines  157 and 158 in txonly.c

Regards,

Bernard

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-26 11:50     ` Iremonger, Bernard
@ 2019-03-26 11:50       ` Iremonger, Bernard
  2019-03-26 12:06       ` Pavan Nikhilesh Bhagavatula
  1 sibling, 0 replies; 68+ messages in thread
From: Iremonger, Bernard @ 2019-03-26 11:50 UTC (permalink / raw)
  To: Thomas Monjalon, Pavan Nikhilesh Bhagavatula
  Cc: dev, Jerin Jacob Kollanukkaran, arybchenko, Yigit, Ferruh

Hi Pavan,

<snip>

> Subject: Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for
> txonly mode
> 
> 01/03/2019 14:47, Pavan Nikhilesh Bhagavatula:
> > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >
> > Use mempool bulk get ops to alloc burst of packets and process them.
> > If bulk get fails fallback to rte_mbuf_raw_alloc.
> >
> > Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > ---
> >
> >  v2 Changes:
> >  - Use bulk ops for fetching segments. (Andrew Rybchenko)
> >  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew
> > Rybchenko)
> >  - Fix mbufs not being freed when there is no more mbufs available for
> > segments. (Andrew Rybchenko)
> >
> >  app/test-pmd/txonly.c | 159
> > ++++++++++++++++++++++++------------------
> >  1 file changed, 93 insertions(+), 66 deletions(-)
> 
> This is changing a lot of lines so it is difficult to know what is changed exactly.
> Please split it with a refactoring without any real change, and introduce the
> real change later.
> Then we'll be able to examine it and check the performance.
> 
> We need to have more tests with more hardware in order to better
> understand the performance improvement.
> For info, a degradation is seen in Mellanox lab.
> 
> 
+1 
Not easy to review.
Btw, unnecessary change at lines  157 and 158 in txonly.c

Regards,

Bernard


^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-26 11:50     ` Iremonger, Bernard
  2019-03-26 11:50       ` Iremonger, Bernard
@ 2019-03-26 12:06       ` Pavan Nikhilesh Bhagavatula
  2019-03-26 12:06         ` Pavan Nikhilesh Bhagavatula
  2019-03-26 13:16         ` Jerin Jacob Kollanukkaran
  1 sibling, 2 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-26 12:06 UTC (permalink / raw)
  To: Iremonger, Bernard, Thomas Monjalon
  Cc: dev, Jerin Jacob Kollanukkaran, arybchenko, Yigit, Ferruh



> -----Original Message-----
> From: Iremonger, Bernard <bernard.iremonger@intel.com>
> Sent: Tuesday, March 26, 2019 5:20 PM
> To: Thomas Monjalon <thomas@monjalon.net>; Pavan Nikhilesh
> Bhagavatula <pbhagavatula@marvell.com>
> Cc: dev@dpdk.org; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> arybchenko@solarflare.com; Yigit, Ferruh <ferruh.yigit@intel.com>
> Subject: [EXT] RE: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk
> get for txonly mode
> 
> External Email
> 
> ----------------------------------------------------------------------
> Hi Pavan,
> 
> <snip>
> 
> > Subject: Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get
> > for txonly mode
> >
> > 01/03/2019 14:47, Pavan Nikhilesh Bhagavatula:
> > > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > >
> > > Use mempool bulk get ops to alloc burst of packets and process them.
> > > If bulk get fails fallback to rte_mbuf_raw_alloc.
> > >
> > > Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
> > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > > ---
> > >
> > >  v2 Changes:
> > >  - Use bulk ops for fetching segments. (Andrew Rybchenko)
> > >  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew
> > > Rybchenko)
> > >  - Fix mbufs not being freed when there is no more mbufs available
> > > for segments. (Andrew Rybchenko)
> > >
> > >  app/test-pmd/txonly.c | 159
> > > ++++++++++++++++++++++++------------------
> > >  1 file changed, 93 insertions(+), 66 deletions(-)
> >
> > This is changing a lot of lines so it is difficult to know what is changed
> exactly.
> > Please split it with a refactoring without any real change, and
> > introduce the real change later.

Ok will split in the next version.

> > Then we'll be able to examine it and check the performance.
> >
> > We need to have more tests with more hardware in order to better
> > understand the performance improvement.
> > For info, a degradation is seen in Mellanox lab.

The only real change was that we introduced mempool_bulk get to avoid multiple calls to mempool layer. 
I don't see how that would degrade the performance.

> >
> >
> +1
> Not easy to review.
> Btw, unnecessary change at lines  157 and 158 in txonly.c

Will remove the assignments.

> 
> Regards,
> 
> Bernard

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-26 12:06       ` Pavan Nikhilesh Bhagavatula
@ 2019-03-26 12:06         ` Pavan Nikhilesh Bhagavatula
  2019-03-26 13:16         ` Jerin Jacob Kollanukkaran
  1 sibling, 0 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-26 12:06 UTC (permalink / raw)
  To: Iremonger, Bernard, Thomas Monjalon
  Cc: dev, Jerin Jacob Kollanukkaran, arybchenko, Yigit, Ferruh



> -----Original Message-----
> From: Iremonger, Bernard <bernard.iremonger@intel.com>
> Sent: Tuesday, March 26, 2019 5:20 PM
> To: Thomas Monjalon <thomas@monjalon.net>; Pavan Nikhilesh
> Bhagavatula <pbhagavatula@marvell.com>
> Cc: dev@dpdk.org; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> arybchenko@solarflare.com; Yigit, Ferruh <ferruh.yigit@intel.com>
> Subject: [EXT] RE: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk
> get for txonly mode
> 
> External Email
> 
> ----------------------------------------------------------------------
> Hi Pavan,
> 
> <snip>
> 
> > Subject: Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get
> > for txonly mode
> >
> > 01/03/2019 14:47, Pavan Nikhilesh Bhagavatula:
> > > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > >
> > > Use mempool bulk get ops to alloc burst of packets and process them.
> > > If bulk get fails fallback to rte_mbuf_raw_alloc.
> > >
> > > Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
> > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > > ---
> > >
> > >  v2 Changes:
> > >  - Use bulk ops for fetching segments. (Andrew Rybchenko)
> > >  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew
> > > Rybchenko)
> > >  - Fix mbufs not being freed when there is no more mbufs available
> > > for segments. (Andrew Rybchenko)
> > >
> > >  app/test-pmd/txonly.c | 159
> > > ++++++++++++++++++++++++------------------
> > >  1 file changed, 93 insertions(+), 66 deletions(-)
> >
> > This is changing a lot of lines so it is difficult to know what is changed
> exactly.
> > Please split it with a refactoring without any real change, and
> > introduce the real change later.

Ok will split in the next version.

> > Then we'll be able to examine it and check the performance.
> >
> > We need to have more tests with more hardware in order to better
> > understand the performance improvement.
> > For info, a degradation is seen in Mellanox lab.

The only real change was that we introduced mempool_bulk get to avoid multiple calls to mempool layer. 
I don't see how that would degrade the performance.

> >
> >
> +1
> Not easy to review.
> Btw, unnecessary change at lines  157 and 158 in txonly.c

Will remove the assignments.

> 
> Regards,
> 
> Bernard


^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v3 1/2] app/testpmd: optimize testpmd txonly mode
  2019-02-28 19:42 [dpdk-dev] [PATCH] app/testpmd: use mempool bulk get for txonly mode Pavan Nikhilesh Bhagavatula
  2019-03-01  7:38 ` Andrew Rybchenko
  2019-03-01 13:47 ` [dpdk-dev] [PATCH v2] app/testpmd: add " Pavan Nikhilesh Bhagavatula
@ 2019-03-26 12:26 ` Pavan Nikhilesh Bhagavatula
  2019-03-26 12:26   ` Pavan Nikhilesh Bhagavatula
  2019-03-26 12:27   ` [dpdk-dev] [PATCH v3 2/2] app/testpmd: add mempool bulk get for " Pavan Nikhilesh Bhagavatula
  2019-03-26 13:03 ` [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd " Pavan Nikhilesh Bhagavatula
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-26 12:26 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger
  Cc: dev, Pavan Nikhilesh Bhagavatula

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Optimize testpmd txonly mode by
1. Moving per packet ethernet header copy above the loop.
2. Use bulk ops for allocating segments instead of having a inner loop
for every segment.

Also, move the packet prepare logic into a separate function so that it
can be reused later.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 v3 Changes:
 - Split the patches for easier review. (Thomas)
 - Remove unnecessary assignments to 0. (Bernard)

 v2 Changes:
 - Use bulk ops for fetching segments. (Andrew Rybchenko)
 - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
 - Fix mbufs not being freed when there is no more mbufs available for
 segments. (Andrew Rybchenko)

 app/test-pmd/txonly.c | 140 +++++++++++++++++++++++-------------------
 1 file changed, 76 insertions(+), 64 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 1f08b6ed3..5610e0e42 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -147,6 +147,62 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
 	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
 }

+static inline bool
+pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
+		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
+		const uint16_t vlan_tci_outer, const uint64_t ol_flags)
+{
+	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
+	struct rte_mbuf *pkt_seg;
+	uint32_t nb_segs, pkt_len;
+	uint8_t i;
+
+	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
+		nb_segs = random() % tx_pkt_nb_segs + 1;
+	else
+		nb_segs = tx_pkt_nb_segs;
+
+	if (nb_segs > 1) {
+		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs))
+			return false;
+	}
+
+	rte_pktmbuf_reset_headroom(pkt);
+	pkt->data_len = tx_pkt_seg_lengths[0];
+	pkt->ol_flags = ol_flags;
+	pkt->vlan_tci = vlan_tci;
+	pkt->vlan_tci_outer = vlan_tci_outer;
+	pkt->l2_len = sizeof(struct ether_hdr);
+	pkt->l3_len = sizeof(struct ipv4_hdr);
+
+	pkt_seg = pkt;
+	for (i = 1; i < nb_segs; i++) {
+		pkt_seg->next = pkt_segs[i - 1];
+		pkt_seg = pkt_seg->next;
+		pkt_seg->data_len = tx_pkt_seg_lengths[i];
+		pkt_len += pkt_seg->data_len;
+	}
+	pkt_seg->next = NULL; /* Last segment of packet. */
+	/*
+	 * Copy headers in first packet segment(s).
+	 */
+	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
+	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
+			sizeof(struct ether_hdr));
+	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
+			sizeof(struct ether_hdr) +
+			sizeof(struct ipv4_hdr));
+
+	/*
+	 * Complete first mbuf of packet and append it to the
+	 * burst of packets to be transmitted.
+	 */
+	pkt->nb_segs = nb_segs;
+	pkt->pkt_len += pkt_len;
+
+	return true;
+}
+
 /*
  * Transmit a burst of multi-segments packets.
  */
@@ -154,9 +210,8 @@ static void
 pkt_burst_transmit(struct fwd_stream *fs)
 {
 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
-	struct rte_port *txp;
 	struct rte_mbuf *pkt;
-	struct rte_mbuf *pkt_seg;
+	struct rte_port *txp;
 	struct rte_mempool *mbp;
 	struct ether_hdr eth_hdr;
 	uint16_t nb_tx;
@@ -164,14 +219,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	uint16_t vlan_tci, vlan_tci_outer;
 	uint32_t retry;
 	uint64_t ol_flags = 0;
-	uint8_t  i;
 	uint64_t tx_offloads;
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	uint64_t start_tsc;
 	uint64_t end_tsc;
 	uint64_t core_cycles;
 #endif
-	uint32_t nb_segs, pkt_len;

 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	start_tsc = rte_rdtsc();
@@ -188,72 +241,31 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		ol_flags |= PKT_TX_QINQ_PKT;
 	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
 		ol_flags |= PKT_TX_MACSEC;
+
+	/*
+	 * Initialize Ethernet header.
+	 */
+	ether_addr_copy(&peer_eth_addrs[fs->peer_addr], &eth_hdr.d_addr);
+	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
+	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+
 	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
 		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL) {
-		nomore_mbuf:
-			if (nb_pkt == 0)
-				return;
+		if (pkt == NULL)
+			break;
+		if (unlikely(!pkt_burst_prepare(pkt, mbp,
+						&eth_hdr, vlan_tci,
+						vlan_tci_outer,
+						ol_flags))) {
+			rte_mempool_put(mbp, pkt);
 			break;
 		}
-
-		/*
-		 * Using raw alloc is good to improve performance,
-		 * but some consumers may use the headroom and so
-		 * decrement data_off. We need to make sure it is
-		 * reset to default value.
-		 */
-		rte_pktmbuf_reset_headroom(pkt);
-		pkt->data_len = tx_pkt_seg_lengths[0];
-		pkt_seg = pkt;
-		if (tx_pkt_split == TX_PKT_SPLIT_RND)
-			nb_segs = random() % tx_pkt_nb_segs + 1;
-		else
-			nb_segs = tx_pkt_nb_segs;
-		pkt_len = pkt->data_len;
-		for (i = 1; i < nb_segs; i++) {
-			pkt_seg->next = rte_mbuf_raw_alloc(mbp);
-			if (pkt_seg->next == NULL) {
-				pkt->nb_segs = i;
-				rte_pktmbuf_free(pkt);
-				goto nomore_mbuf;
-			}
-			pkt_seg = pkt_seg->next;
-			pkt_seg->data_len = tx_pkt_seg_lengths[i];
-			pkt_len += pkt_seg->data_len;
-		}
-		pkt_seg->next = NULL; /* Last segment of packet. */
-
-		/*
-		 * Initialize Ethernet header.
-		 */
-		ether_addr_copy(&peer_eth_addrs[fs->peer_addr],&eth_hdr.d_addr);
-		ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
-		eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
-
-		/*
-		 * Copy headers in first packet segment(s).
-		 */
-		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
-		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
-				sizeof(struct ether_hdr));
-		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
-				sizeof(struct ether_hdr) +
-				sizeof(struct ipv4_hdr));
-
-		/*
-		 * Complete first mbuf of packet and append it to the
-		 * burst of packets to be transmitted.
-		 */
-		pkt->nb_segs = nb_segs;
-		pkt->pkt_len = pkt_len;
-		pkt->ol_flags = ol_flags;
-		pkt->vlan_tci = vlan_tci;
-		pkt->vlan_tci_outer = vlan_tci_outer;
-		pkt->l2_len = sizeof(struct ether_hdr);
-		pkt->l3_len = sizeof(struct ipv4_hdr);
 		pkts_burst[nb_pkt] = pkt;
 	}
+
+	if (nb_pkt == 0)
+		return;
+
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
 	/*
 	 * Retry if necessary
--
2.20.1

^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v3 1/2] app/testpmd: optimize testpmd txonly mode
  2019-03-26 12:26 ` [dpdk-dev] [PATCH v3 1/2] app/testpmd: optimize testpmd " Pavan Nikhilesh Bhagavatula
@ 2019-03-26 12:26   ` Pavan Nikhilesh Bhagavatula
  2019-03-26 12:27   ` [dpdk-dev] [PATCH v3 2/2] app/testpmd: add mempool bulk get for " Pavan Nikhilesh Bhagavatula
  1 sibling, 0 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-26 12:26 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger
  Cc: dev, Pavan Nikhilesh Bhagavatula

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Optimize testpmd txonly mode by
1. Moving per packet ethernet header copy above the loop.
2. Use bulk ops for allocating segments instead of having a inner loop
for every segment.

Also, move the packet prepare logic into a separate function so that it
can be reused later.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 v3 Changes:
 - Split the patches for easier review. (Thomas)
 - Remove unnecessary assignments to 0. (Bernard)

 v2 Changes:
 - Use bulk ops for fetching segments. (Andrew Rybchenko)
 - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
 - Fix mbufs not being freed when there is no more mbufs available for
 segments. (Andrew Rybchenko)

 app/test-pmd/txonly.c | 140 +++++++++++++++++++++++-------------------
 1 file changed, 76 insertions(+), 64 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 1f08b6ed3..5610e0e42 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -147,6 +147,62 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
 	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
 }

+static inline bool
+pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
+		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
+		const uint16_t vlan_tci_outer, const uint64_t ol_flags)
+{
+	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
+	struct rte_mbuf *pkt_seg;
+	uint32_t nb_segs, pkt_len;
+	uint8_t i;
+
+	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
+		nb_segs = random() % tx_pkt_nb_segs + 1;
+	else
+		nb_segs = tx_pkt_nb_segs;
+
+	if (nb_segs > 1) {
+		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs))
+			return false;
+	}
+
+	rte_pktmbuf_reset_headroom(pkt);
+	pkt->data_len = tx_pkt_seg_lengths[0];
+	pkt->ol_flags = ol_flags;
+	pkt->vlan_tci = vlan_tci;
+	pkt->vlan_tci_outer = vlan_tci_outer;
+	pkt->l2_len = sizeof(struct ether_hdr);
+	pkt->l3_len = sizeof(struct ipv4_hdr);
+
+	pkt_seg = pkt;
+	for (i = 1; i < nb_segs; i++) {
+		pkt_seg->next = pkt_segs[i - 1];
+		pkt_seg = pkt_seg->next;
+		pkt_seg->data_len = tx_pkt_seg_lengths[i];
+		pkt_len += pkt_seg->data_len;
+	}
+	pkt_seg->next = NULL; /* Last segment of packet. */
+	/*
+	 * Copy headers in first packet segment(s).
+	 */
+	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
+	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
+			sizeof(struct ether_hdr));
+	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
+			sizeof(struct ether_hdr) +
+			sizeof(struct ipv4_hdr));
+
+	/*
+	 * Complete first mbuf of packet and append it to the
+	 * burst of packets to be transmitted.
+	 */
+	pkt->nb_segs = nb_segs;
+	pkt->pkt_len += pkt_len;
+
+	return true;
+}
+
 /*
  * Transmit a burst of multi-segments packets.
  */
@@ -154,9 +210,8 @@ static void
 pkt_burst_transmit(struct fwd_stream *fs)
 {
 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
-	struct rte_port *txp;
 	struct rte_mbuf *pkt;
-	struct rte_mbuf *pkt_seg;
+	struct rte_port *txp;
 	struct rte_mempool *mbp;
 	struct ether_hdr eth_hdr;
 	uint16_t nb_tx;
@@ -164,14 +219,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	uint16_t vlan_tci, vlan_tci_outer;
 	uint32_t retry;
 	uint64_t ol_flags = 0;
-	uint8_t  i;
 	uint64_t tx_offloads;
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	uint64_t start_tsc;
 	uint64_t end_tsc;
 	uint64_t core_cycles;
 #endif
-	uint32_t nb_segs, pkt_len;

 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	start_tsc = rte_rdtsc();
@@ -188,72 +241,31 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		ol_flags |= PKT_TX_QINQ_PKT;
 	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
 		ol_flags |= PKT_TX_MACSEC;
+
+	/*
+	 * Initialize Ethernet header.
+	 */
+	ether_addr_copy(&peer_eth_addrs[fs->peer_addr], &eth_hdr.d_addr);
+	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
+	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+
 	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
 		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL) {
-		nomore_mbuf:
-			if (nb_pkt == 0)
-				return;
+		if (pkt == NULL)
+			break;
+		if (unlikely(!pkt_burst_prepare(pkt, mbp,
+						&eth_hdr, vlan_tci,
+						vlan_tci_outer,
+						ol_flags))) {
+			rte_mempool_put(mbp, pkt);
 			break;
 		}
-
-		/*
-		 * Using raw alloc is good to improve performance,
-		 * but some consumers may use the headroom and so
-		 * decrement data_off. We need to make sure it is
-		 * reset to default value.
-		 */
-		rte_pktmbuf_reset_headroom(pkt);
-		pkt->data_len = tx_pkt_seg_lengths[0];
-		pkt_seg = pkt;
-		if (tx_pkt_split == TX_PKT_SPLIT_RND)
-			nb_segs = random() % tx_pkt_nb_segs + 1;
-		else
-			nb_segs = tx_pkt_nb_segs;
-		pkt_len = pkt->data_len;
-		for (i = 1; i < nb_segs; i++) {
-			pkt_seg->next = rte_mbuf_raw_alloc(mbp);
-			if (pkt_seg->next == NULL) {
-				pkt->nb_segs = i;
-				rte_pktmbuf_free(pkt);
-				goto nomore_mbuf;
-			}
-			pkt_seg = pkt_seg->next;
-			pkt_seg->data_len = tx_pkt_seg_lengths[i];
-			pkt_len += pkt_seg->data_len;
-		}
-		pkt_seg->next = NULL; /* Last segment of packet. */
-
-		/*
-		 * Initialize Ethernet header.
-		 */
-		ether_addr_copy(&peer_eth_addrs[fs->peer_addr],&eth_hdr.d_addr);
-		ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
-		eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
-
-		/*
-		 * Copy headers in first packet segment(s).
-		 */
-		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
-		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
-				sizeof(struct ether_hdr));
-		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
-				sizeof(struct ether_hdr) +
-				sizeof(struct ipv4_hdr));
-
-		/*
-		 * Complete first mbuf of packet and append it to the
-		 * burst of packets to be transmitted.
-		 */
-		pkt->nb_segs = nb_segs;
-		pkt->pkt_len = pkt_len;
-		pkt->ol_flags = ol_flags;
-		pkt->vlan_tci = vlan_tci;
-		pkt->vlan_tci_outer = vlan_tci_outer;
-		pkt->l2_len = sizeof(struct ether_hdr);
-		pkt->l3_len = sizeof(struct ipv4_hdr);
 		pkts_burst[nb_pkt] = pkt;
 	}
+
+	if (nb_pkt == 0)
+		return;
+
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
 	/*
 	 * Retry if necessary
--
2.20.1


^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v3 2/2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-26 12:26 ` [dpdk-dev] [PATCH v3 1/2] app/testpmd: optimize testpmd " Pavan Nikhilesh Bhagavatula
  2019-03-26 12:26   ` Pavan Nikhilesh Bhagavatula
@ 2019-03-26 12:27   ` Pavan Nikhilesh Bhagavatula
  2019-03-26 12:27     ` Pavan Nikhilesh Bhagavatula
  1 sibling, 1 reply; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-26 12:27 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger
  Cc: dev, Pavan Nikhilesh Bhagavatula, Yingya Han

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use mempool bulk get ops to alloc burst of packets and process them.
If bulk get fails fallback to rte_mbuf_raw_alloc.

Tested-by: Yingya Han <yingyax.han@intel.com>
Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test-pmd/txonly.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 5610e0e42..3ca13e622 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -249,18 +249,33 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
 	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
 
-	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
-		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL)
-			break;
-		if (unlikely(!pkt_burst_prepare(pkt, mbp,
-						&eth_hdr, vlan_tci,
-						vlan_tci_outer,
-						ol_flags))) {
-			rte_mempool_put(mbp, pkt);
-			break;
+	if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
+				nb_pkt_per_burst) == 0) {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put_bulk(mbp,
+						(void **)&pkts_burst[nb_pkt],
+						nb_pkt_per_burst - nb_pkt);
+				break;
+			}
+		}
+	} else {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			pkt = rte_mbuf_raw_alloc(mbp);
+			if (pkt == NULL)
+				break;
+			if (unlikely(!pkt_burst_prepare(pkt, mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put(mbp, pkt);
+				break;
+			}
+			pkts_burst[nb_pkt] = pkt;
 		}
-		pkts_burst[nb_pkt] = pkt;
 	}
 
 	if (nb_pkt == 0)
-- 
2.20.1

^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v3 2/2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-26 12:27   ` [dpdk-dev] [PATCH v3 2/2] app/testpmd: add mempool bulk get for " Pavan Nikhilesh Bhagavatula
@ 2019-03-26 12:27     ` Pavan Nikhilesh Bhagavatula
  0 siblings, 0 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-26 12:27 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger
  Cc: dev, Pavan Nikhilesh Bhagavatula, Yingya Han

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use mempool bulk get ops to alloc burst of packets and process them.
If bulk get fails fallback to rte_mbuf_raw_alloc.

Tested-by: Yingya Han <yingyax.han@intel.com>
Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test-pmd/txonly.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 5610e0e42..3ca13e622 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -249,18 +249,33 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
 	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
 
-	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
-		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL)
-			break;
-		if (unlikely(!pkt_burst_prepare(pkt, mbp,
-						&eth_hdr, vlan_tci,
-						vlan_tci_outer,
-						ol_flags))) {
-			rte_mempool_put(mbp, pkt);
-			break;
+	if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
+				nb_pkt_per_burst) == 0) {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put_bulk(mbp,
+						(void **)&pkts_burst[nb_pkt],
+						nb_pkt_per_burst - nb_pkt);
+				break;
+			}
+		}
+	} else {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			pkt = rte_mbuf_raw_alloc(mbp);
+			if (pkt == NULL)
+				break;
+			if (unlikely(!pkt_burst_prepare(pkt, mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put(mbp, pkt);
+				break;
+			}
+			pkts_burst[nb_pkt] = pkt;
 		}
-		pkts_burst[nb_pkt] = pkt;
 	}
 
 	if (nb_pkt == 0)
-- 
2.20.1


^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd txonly mode
  2019-02-28 19:42 [dpdk-dev] [PATCH] app/testpmd: use mempool bulk get for txonly mode Pavan Nikhilesh Bhagavatula
                   ` (2 preceding siblings ...)
  2019-03-26 12:26 ` [dpdk-dev] [PATCH v3 1/2] app/testpmd: optimize testpmd " Pavan Nikhilesh Bhagavatula
@ 2019-03-26 13:03 ` Pavan Nikhilesh Bhagavatula
  2019-03-26 13:03   ` Pavan Nikhilesh Bhagavatula
                     ` (2 more replies)
  2019-03-31 13:14 ` [dpdk-dev] [PATCH v5 " Pavan Nikhilesh Bhagavatula
  2019-04-02  9:53 ` [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop Pavan Nikhilesh Bhagavatula
  5 siblings, 3 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-26 13:03 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger
  Cc: dev, Pavan Nikhilesh Bhagavatula

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Optimize testpmd txonly mode by
1. Moving per packet ethernet header copy above the loop.
2. Use bulk ops for allocating segments instead of having a inner loop
for every segment.

Also, move the packet prepare logic into a separate function so that it
can be reused later.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 v4 Changes:
 - Fix packet len calculation.

 v3 Changes:
 - Split the patches for easier review. (Thomas)
 - Remove unnecessary assignments to 0. (Bernard)

 v2 Changes:
 - Use bulk ops for fetching segments. (Andrew Rybchenko)
 - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
 - Fix mbufs not being freed when there is no more mbufs available for
 segments. (Andrew Rybchenko)

 app/test-pmd/txonly.c | 141 +++++++++++++++++++++++-------------------
 1 file changed, 77 insertions(+), 64 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 1f08b6ed3..8d49e41b1 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -147,6 +147,63 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
 	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
 }

+static inline bool
+pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
+		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
+		const uint16_t vlan_tci_outer, const uint64_t ol_flags)
+{
+	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
+	struct rte_mbuf *pkt_seg;
+	uint32_t nb_segs, pkt_len;
+	uint8_t i;
+
+	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
+		nb_segs = random() % tx_pkt_nb_segs + 1;
+	else
+		nb_segs = tx_pkt_nb_segs;
+
+	if (nb_segs > 1) {
+		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs))
+			return false;
+	}
+
+	rte_pktmbuf_reset_headroom(pkt);
+	pkt->data_len = tx_pkt_seg_lengths[0];
+	pkt->ol_flags = ol_flags;
+	pkt->vlan_tci = vlan_tci;
+	pkt->vlan_tci_outer = vlan_tci_outer;
+	pkt->l2_len = sizeof(struct ether_hdr);
+	pkt->l3_len = sizeof(struct ipv4_hdr);
+
+	pkt_len = pkt->data_len;
+	pkt_seg = pkt;
+	for (i = 1; i < nb_segs; i++) {
+		pkt_seg->next = pkt_segs[i - 1];
+		pkt_seg = pkt_seg->next;
+		pkt_seg->data_len = tx_pkt_seg_lengths[i];
+		pkt_len += pkt_seg->data_len;
+	}
+	pkt_seg->next = NULL; /* Last segment of packet. */
+	/*
+	 * Copy headers in first packet segment(s).
+	 */
+	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
+	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
+			sizeof(struct ether_hdr));
+	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
+			sizeof(struct ether_hdr) +
+			sizeof(struct ipv4_hdr));
+
+	/*
+	 * Complete first mbuf of packet and append it to the
+	 * burst of packets to be transmitted.
+	 */
+	pkt->nb_segs = nb_segs;
+	pkt->pkt_len = pkt_len;
+
+	return true;
+}
+
 /*
  * Transmit a burst of multi-segments packets.
  */
@@ -154,9 +211,8 @@ static void
 pkt_burst_transmit(struct fwd_stream *fs)
 {
 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
-	struct rte_port *txp;
 	struct rte_mbuf *pkt;
-	struct rte_mbuf *pkt_seg;
+	struct rte_port *txp;
 	struct rte_mempool *mbp;
 	struct ether_hdr eth_hdr;
 	uint16_t nb_tx;
@@ -164,14 +220,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	uint16_t vlan_tci, vlan_tci_outer;
 	uint32_t retry;
 	uint64_t ol_flags = 0;
-	uint8_t  i;
 	uint64_t tx_offloads;
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	uint64_t start_tsc;
 	uint64_t end_tsc;
 	uint64_t core_cycles;
 #endif
-	uint32_t nb_segs, pkt_len;

 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	start_tsc = rte_rdtsc();
@@ -188,72 +242,31 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		ol_flags |= PKT_TX_QINQ_PKT;
 	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
 		ol_flags |= PKT_TX_MACSEC;
+
+	/*
+	 * Initialize Ethernet header.
+	 */
+	ether_addr_copy(&peer_eth_addrs[fs->peer_addr], &eth_hdr.d_addr);
+	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
+	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+
 	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
 		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL) {
-		nomore_mbuf:
-			if (nb_pkt == 0)
-				return;
+		if (pkt == NULL)
+			break;
+		if (unlikely(!pkt_burst_prepare(pkt, mbp,
+						&eth_hdr, vlan_tci,
+						vlan_tci_outer,
+						ol_flags))) {
+			rte_mempool_put(mbp, pkt);
 			break;
 		}
-
-		/*
-		 * Using raw alloc is good to improve performance,
-		 * but some consumers may use the headroom and so
-		 * decrement data_off. We need to make sure it is
-		 * reset to default value.
-		 */
-		rte_pktmbuf_reset_headroom(pkt);
-		pkt->data_len = tx_pkt_seg_lengths[0];
-		pkt_seg = pkt;
-		if (tx_pkt_split == TX_PKT_SPLIT_RND)
-			nb_segs = random() % tx_pkt_nb_segs + 1;
-		else
-			nb_segs = tx_pkt_nb_segs;
-		pkt_len = pkt->data_len;
-		for (i = 1; i < nb_segs; i++) {
-			pkt_seg->next = rte_mbuf_raw_alloc(mbp);
-			if (pkt_seg->next == NULL) {
-				pkt->nb_segs = i;
-				rte_pktmbuf_free(pkt);
-				goto nomore_mbuf;
-			}
-			pkt_seg = pkt_seg->next;
-			pkt_seg->data_len = tx_pkt_seg_lengths[i];
-			pkt_len += pkt_seg->data_len;
-		}
-		pkt_seg->next = NULL; /* Last segment of packet. */
-
-		/*
-		 * Initialize Ethernet header.
-		 */
-		ether_addr_copy(&peer_eth_addrs[fs->peer_addr],&eth_hdr.d_addr);
-		ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
-		eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
-
-		/*
-		 * Copy headers in first packet segment(s).
-		 */
-		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
-		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
-				sizeof(struct ether_hdr));
-		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
-				sizeof(struct ether_hdr) +
-				sizeof(struct ipv4_hdr));
-
-		/*
-		 * Complete first mbuf of packet and append it to the
-		 * burst of packets to be transmitted.
-		 */
-		pkt->nb_segs = nb_segs;
-		pkt->pkt_len = pkt_len;
-		pkt->ol_flags = ol_flags;
-		pkt->vlan_tci = vlan_tci;
-		pkt->vlan_tci_outer = vlan_tci_outer;
-		pkt->l2_len = sizeof(struct ether_hdr);
-		pkt->l3_len = sizeof(struct ipv4_hdr);
 		pkts_burst[nb_pkt] = pkt;
 	}
+
+	if (nb_pkt == 0)
+		return;
+
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
 	/*
 	 * Retry if necessary
--
2.20.1

^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd txonly mode
  2019-03-26 13:03 ` [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd " Pavan Nikhilesh Bhagavatula
@ 2019-03-26 13:03   ` Pavan Nikhilesh Bhagavatula
  2019-03-26 13:03   ` [dpdk-dev] [PATCH v4 2/2] app/testpmd: add mempool bulk get for " Pavan Nikhilesh Bhagavatula
  2019-03-26 16:13   ` [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd " Iremonger, Bernard
  2 siblings, 0 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-26 13:03 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger
  Cc: dev, Pavan Nikhilesh Bhagavatula

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Optimize testpmd txonly mode by
1. Moving per packet ethernet header copy above the loop.
2. Use bulk ops for allocating segments instead of having a inner loop
for every segment.

Also, move the packet prepare logic into a separate function so that it
can be reused later.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 v4 Changes:
 - Fix packet len calculation.

 v3 Changes:
 - Split the patches for easier review. (Thomas)
 - Remove unnecessary assignments to 0. (Bernard)

 v2 Changes:
 - Use bulk ops for fetching segments. (Andrew Rybchenko)
 - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
 - Fix mbufs not being freed when there is no more mbufs available for
 segments. (Andrew Rybchenko)

 app/test-pmd/txonly.c | 141 +++++++++++++++++++++++-------------------
 1 file changed, 77 insertions(+), 64 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 1f08b6ed3..8d49e41b1 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -147,6 +147,63 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
 	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
 }

+static inline bool
+pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
+		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
+		const uint16_t vlan_tci_outer, const uint64_t ol_flags)
+{
+	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
+	struct rte_mbuf *pkt_seg;
+	uint32_t nb_segs, pkt_len;
+	uint8_t i;
+
+	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
+		nb_segs = random() % tx_pkt_nb_segs + 1;
+	else
+		nb_segs = tx_pkt_nb_segs;
+
+	if (nb_segs > 1) {
+		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs))
+			return false;
+	}
+
+	rte_pktmbuf_reset_headroom(pkt);
+	pkt->data_len = tx_pkt_seg_lengths[0];
+	pkt->ol_flags = ol_flags;
+	pkt->vlan_tci = vlan_tci;
+	pkt->vlan_tci_outer = vlan_tci_outer;
+	pkt->l2_len = sizeof(struct ether_hdr);
+	pkt->l3_len = sizeof(struct ipv4_hdr);
+
+	pkt_len = pkt->data_len;
+	pkt_seg = pkt;
+	for (i = 1; i < nb_segs; i++) {
+		pkt_seg->next = pkt_segs[i - 1];
+		pkt_seg = pkt_seg->next;
+		pkt_seg->data_len = tx_pkt_seg_lengths[i];
+		pkt_len += pkt_seg->data_len;
+	}
+	pkt_seg->next = NULL; /* Last segment of packet. */
+	/*
+	 * Copy headers in first packet segment(s).
+	 */
+	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
+	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
+			sizeof(struct ether_hdr));
+	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
+			sizeof(struct ether_hdr) +
+			sizeof(struct ipv4_hdr));
+
+	/*
+	 * Complete first mbuf of packet and append it to the
+	 * burst of packets to be transmitted.
+	 */
+	pkt->nb_segs = nb_segs;
+	pkt->pkt_len = pkt_len;
+
+	return true;
+}
+
 /*
  * Transmit a burst of multi-segments packets.
  */
@@ -154,9 +211,8 @@ static void
 pkt_burst_transmit(struct fwd_stream *fs)
 {
 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
-	struct rte_port *txp;
 	struct rte_mbuf *pkt;
-	struct rte_mbuf *pkt_seg;
+	struct rte_port *txp;
 	struct rte_mempool *mbp;
 	struct ether_hdr eth_hdr;
 	uint16_t nb_tx;
@@ -164,14 +220,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	uint16_t vlan_tci, vlan_tci_outer;
 	uint32_t retry;
 	uint64_t ol_flags = 0;
-	uint8_t  i;
 	uint64_t tx_offloads;
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	uint64_t start_tsc;
 	uint64_t end_tsc;
 	uint64_t core_cycles;
 #endif
-	uint32_t nb_segs, pkt_len;

 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	start_tsc = rte_rdtsc();
@@ -188,72 +242,31 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		ol_flags |= PKT_TX_QINQ_PKT;
 	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
 		ol_flags |= PKT_TX_MACSEC;
+
+	/*
+	 * Initialize Ethernet header.
+	 */
+	ether_addr_copy(&peer_eth_addrs[fs->peer_addr], &eth_hdr.d_addr);
+	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
+	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+
 	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
 		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL) {
-		nomore_mbuf:
-			if (nb_pkt == 0)
-				return;
+		if (pkt == NULL)
+			break;
+		if (unlikely(!pkt_burst_prepare(pkt, mbp,
+						&eth_hdr, vlan_tci,
+						vlan_tci_outer,
+						ol_flags))) {
+			rte_mempool_put(mbp, pkt);
 			break;
 		}
-
-		/*
-		 * Using raw alloc is good to improve performance,
-		 * but some consumers may use the headroom and so
-		 * decrement data_off. We need to make sure it is
-		 * reset to default value.
-		 */
-		rte_pktmbuf_reset_headroom(pkt);
-		pkt->data_len = tx_pkt_seg_lengths[0];
-		pkt_seg = pkt;
-		if (tx_pkt_split == TX_PKT_SPLIT_RND)
-			nb_segs = random() % tx_pkt_nb_segs + 1;
-		else
-			nb_segs = tx_pkt_nb_segs;
-		pkt_len = pkt->data_len;
-		for (i = 1; i < nb_segs; i++) {
-			pkt_seg->next = rte_mbuf_raw_alloc(mbp);
-			if (pkt_seg->next == NULL) {
-				pkt->nb_segs = i;
-				rte_pktmbuf_free(pkt);
-				goto nomore_mbuf;
-			}
-			pkt_seg = pkt_seg->next;
-			pkt_seg->data_len = tx_pkt_seg_lengths[i];
-			pkt_len += pkt_seg->data_len;
-		}
-		pkt_seg->next = NULL; /* Last segment of packet. */
-
-		/*
-		 * Initialize Ethernet header.
-		 */
-		ether_addr_copy(&peer_eth_addrs[fs->peer_addr],&eth_hdr.d_addr);
-		ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
-		eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
-
-		/*
-		 * Copy headers in first packet segment(s).
-		 */
-		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
-		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
-				sizeof(struct ether_hdr));
-		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
-				sizeof(struct ether_hdr) +
-				sizeof(struct ipv4_hdr));
-
-		/*
-		 * Complete first mbuf of packet and append it to the
-		 * burst of packets to be transmitted.
-		 */
-		pkt->nb_segs = nb_segs;
-		pkt->pkt_len = pkt_len;
-		pkt->ol_flags = ol_flags;
-		pkt->vlan_tci = vlan_tci;
-		pkt->vlan_tci_outer = vlan_tci_outer;
-		pkt->l2_len = sizeof(struct ether_hdr);
-		pkt->l3_len = sizeof(struct ipv4_hdr);
 		pkts_burst[nb_pkt] = pkt;
 	}
+
+	if (nb_pkt == 0)
+		return;
+
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
 	/*
 	 * Retry if necessary
--
2.20.1


^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v4 2/2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-26 13:03 ` [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd " Pavan Nikhilesh Bhagavatula
  2019-03-26 13:03   ` Pavan Nikhilesh Bhagavatula
@ 2019-03-26 13:03   ` Pavan Nikhilesh Bhagavatula
  2019-03-26 13:03     ` Pavan Nikhilesh Bhagavatula
  2019-03-26 16:13   ` [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd " Iremonger, Bernard
  2 siblings, 1 reply; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-26 13:03 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger
  Cc: dev, Pavan Nikhilesh Bhagavatula, Yingya Han

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use mempool bulk get ops to alloc burst of packets and process them.
If bulk get fails fallback to rte_mbuf_raw_alloc.

Tested-by: Yingya Han <yingyax.han@intel.com>
Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test-pmd/txonly.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 8d49e41b1..0330f4c5a 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -250,18 +250,33 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
 	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
 
-	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
-		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL)
-			break;
-		if (unlikely(!pkt_burst_prepare(pkt, mbp,
-						&eth_hdr, vlan_tci,
-						vlan_tci_outer,
-						ol_flags))) {
-			rte_mempool_put(mbp, pkt);
-			break;
+	if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
+				nb_pkt_per_burst) == 0) {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put_bulk(mbp,
+						(void **)&pkts_burst[nb_pkt],
+						nb_pkt_per_burst - nb_pkt);
+				break;
+			}
+		}
+	} else {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			pkt = rte_mbuf_raw_alloc(mbp);
+			if (pkt == NULL)
+				break;
+			if (unlikely(!pkt_burst_prepare(pkt, mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put(mbp, pkt);
+				break;
+			}
+			pkts_burst[nb_pkt] = pkt;
 		}
-		pkts_burst[nb_pkt] = pkt;
 	}
 
 	if (nb_pkt == 0)
-- 
2.20.1

^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v4 2/2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-26 13:03   ` [dpdk-dev] [PATCH v4 2/2] app/testpmd: add mempool bulk get for " Pavan Nikhilesh Bhagavatula
@ 2019-03-26 13:03     ` Pavan Nikhilesh Bhagavatula
  0 siblings, 0 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-26 13:03 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger
  Cc: dev, Pavan Nikhilesh Bhagavatula, Yingya Han

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use mempool bulk get ops to alloc burst of packets and process them.
If bulk get fails fallback to rte_mbuf_raw_alloc.

Tested-by: Yingya Han <yingyax.han@intel.com>
Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test-pmd/txonly.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 8d49e41b1..0330f4c5a 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -250,18 +250,33 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
 	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
 
-	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
-		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL)
-			break;
-		if (unlikely(!pkt_burst_prepare(pkt, mbp,
-						&eth_hdr, vlan_tci,
-						vlan_tci_outer,
-						ol_flags))) {
-			rte_mempool_put(mbp, pkt);
-			break;
+	if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
+				nb_pkt_per_burst) == 0) {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put_bulk(mbp,
+						(void **)&pkts_burst[nb_pkt],
+						nb_pkt_per_burst - nb_pkt);
+				break;
+			}
+		}
+	} else {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			pkt = rte_mbuf_raw_alloc(mbp);
+			if (pkt == NULL)
+				break;
+			if (unlikely(!pkt_burst_prepare(pkt, mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put(mbp, pkt);
+				break;
+			}
+			pkts_burst[nb_pkt] = pkt;
 		}
-		pkts_burst[nb_pkt] = pkt;
 	}
 
 	if (nb_pkt == 0)
-- 
2.20.1


^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-26 12:06       ` Pavan Nikhilesh Bhagavatula
  2019-03-26 12:06         ` Pavan Nikhilesh Bhagavatula
@ 2019-03-26 13:16         ` Jerin Jacob Kollanukkaran
  2019-03-26 13:16           ` Jerin Jacob Kollanukkaran
  1 sibling, 1 reply; 68+ messages in thread
From: Jerin Jacob Kollanukkaran @ 2019-03-26 13:16 UTC (permalink / raw)
  To: thomas, bernard.iremonger, Pavan Nikhilesh Bhagavatula
  Cc: shahafs, matan, dev, arybchenko, ferruh.yigit, yskoh

On Tue, 2019-03-26 at 12:06 +0000, Pavan Nikhilesh Bhagavatula wrote:
> > -----Original Message-----
> > From: Iremonger, Bernard <bernard.iremonger@intel.com>
> > Sent: Tuesday, March 26, 2019 5:20 PM
> > To: Thomas Monjalon <thomas@monjalon.net>; Pavan Nikhilesh
> > Bhagavatula <pbhagavatula@marvell.com>
> > Cc: dev@dpdk.org; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> > arybchenko@solarflare.com; Yigit, Ferruh <ferruh.yigit@intel.com>
> > Subject: [EXT] RE: [dpdk-dev] [PATCH v2] app/testpmd: add mempool
> > bulk
> > get for txonly mode
> > 
> > 
> > > Then we'll be able to examine it and check the performance.
> > > 
> > > We need to have more tests with more hardware in order to better
> > > understand the performance improvement.
> > > For info, a degradation is seen in Mellanox lab.
> 
> The only real change was that we introduced mempool_bulk get to avoid
> multiple calls to mempool layer. 
> I don't see how that would degrade the performance.

I don't think, it is CPU instruction based degradation as it gives
better performance on x86 and arm64. Adding Mellanox maintainers to
comment on, How does it impact on Mellanox NIC hardware?




^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-26 13:16         ` Jerin Jacob Kollanukkaran
@ 2019-03-26 13:16           ` Jerin Jacob Kollanukkaran
  0 siblings, 0 replies; 68+ messages in thread
From: Jerin Jacob Kollanukkaran @ 2019-03-26 13:16 UTC (permalink / raw)
  To: thomas, bernard.iremonger, Pavan Nikhilesh Bhagavatula
  Cc: shahafs, matan, dev, arybchenko, ferruh.yigit, yskoh

On Tue, 2019-03-26 at 12:06 +0000, Pavan Nikhilesh Bhagavatula wrote:
> > -----Original Message-----
> > From: Iremonger, Bernard <bernard.iremonger@intel.com>
> > Sent: Tuesday, March 26, 2019 5:20 PM
> > To: Thomas Monjalon <thomas@monjalon.net>; Pavan Nikhilesh
> > Bhagavatula <pbhagavatula@marvell.com>
> > Cc: dev@dpdk.org; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> > arybchenko@solarflare.com; Yigit, Ferruh <ferruh.yigit@intel.com>
> > Subject: [EXT] RE: [dpdk-dev] [PATCH v2] app/testpmd: add mempool
> > bulk
> > get for txonly mode
> > 
> > 
> > > Then we'll be able to examine it and check the performance.
> > > 
> > > We need to have more tests with more hardware in order to better
> > > understand the performance improvement.
> > > For info, a degradation is seen in Mellanox lab.
> 
> The only real change was that we introduced mempool_bulk get to avoid
> multiple calls to mempool layer. 
> I don't see how that would degrade the performance.

I don't think, it is CPU instruction based degradation as it gives
better performance on x86 and arm64. Adding Mellanox maintainers to
comment on, How does it impact on Mellanox NIC hardware?




^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd txonly mode
  2019-03-26 13:03 ` [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd " Pavan Nikhilesh Bhagavatula
  2019-03-26 13:03   ` Pavan Nikhilesh Bhagavatula
  2019-03-26 13:03   ` [dpdk-dev] [PATCH v4 2/2] app/testpmd: add mempool bulk get for " Pavan Nikhilesh Bhagavatula
@ 2019-03-26 16:13   ` Iremonger, Bernard
  2019-03-26 16:13     ` Iremonger, Bernard
  2 siblings, 1 reply; 68+ messages in thread
From: Iremonger, Bernard @ 2019-03-26 16:13 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran, thomas,
	arybchenko, Yigit, Ferruh
  Cc: dev

Hi Pavan,

> -----Original Message-----
> From: Pavan Nikhilesh Bhagavatula [mailto:pbhagavatula@marvell.com]
> Sent: Tuesday, March 26, 2019 1:03 PM
> To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; thomas@monjalon.net;
> arybchenko@solarflare.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> Iremonger, Bernard <bernard.iremonger@intel.com>
> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> <pbhagavatula@marvell.com>
> Subject: [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd txonly
> mode
> 
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Optimize testpmd txonly mode by
> 1. Moving per packet ethernet header copy above the loop.
> 2. Use bulk ops for allocating segments instead of having a inner loop for
> every segment.
> 
> Also, move the packet prepare logic into a separate function so that it can be
> reused later.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>  v4 Changes:
>  - Fix packet len calculation.
> 
>  v3 Changes:
>  - Split the patches for easier review. (Thomas)
>  - Remove unnecessary assignments to 0. (Bernard)
> 
>  v2 Changes:
>  - Use bulk ops for fetching segments. (Andrew Rybchenko)
>  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
>  - Fix mbufs not being freed when there is no more mbufs available for
> segments. (Andrew Rybchenko)
> 
>  app/test-pmd/txonly.c | 141 +++++++++++++++++++++++-------------------
>  1 file changed, 77 insertions(+), 64 deletions(-)
> 
> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index
> 1f08b6ed3..8d49e41b1 100644
> --- a/app/test-pmd/txonly.c
> +++ b/app/test-pmd/txonly.c
> @@ -147,6 +147,63 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr
> *ip_hdr,
>  	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;  }
> 
> +static inline bool
> +pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
> +		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
> +		const uint16_t vlan_tci_outer, const uint64_t ol_flags) {
> +	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
> +	struct rte_mbuf *pkt_seg;
> +	uint32_t nb_segs, pkt_len;
> +	uint8_t i;
> +
> +	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
> +		nb_segs = random() % tx_pkt_nb_segs + 1;
> +	else
> +		nb_segs = tx_pkt_nb_segs;
> +
> +	if (nb_segs > 1) {
> +		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs,
> nb_segs))
> +			return false;
> +	}
> +
> +	rte_pktmbuf_reset_headroom(pkt);
> +	pkt->data_len = tx_pkt_seg_lengths[0];
> +	pkt->ol_flags = ol_flags;
> +	pkt->vlan_tci = vlan_tci;
> +	pkt->vlan_tci_outer = vlan_tci_outer;
> +	pkt->l2_len = sizeof(struct ether_hdr);
> +	pkt->l3_len = sizeof(struct ipv4_hdr);
> +
> +	pkt_len = pkt->data_len;
> +	pkt_seg = pkt;
> +	for (i = 1; i < nb_segs; i++) {
> +		pkt_seg->next = pkt_segs[i - 1];
> +		pkt_seg = pkt_seg->next;
> +		pkt_seg->data_len = tx_pkt_seg_lengths[i];
> +		pkt_len += pkt_seg->data_len;
> +	}
> +	pkt_seg->next = NULL; /* Last segment of packet. */
> +	/*
> +	 * Copy headers in first packet segment(s).
> +	 */
> +	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
> +	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> +			sizeof(struct ether_hdr));
> +	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> +			sizeof(struct ether_hdr) +
> +			sizeof(struct ipv4_hdr));
> +
> +	/*
> +	 * Complete first mbuf of packet and append it to the
> +	 * burst of packets to be transmitted.
> +	 */
> +	pkt->nb_segs = nb_segs;
> +	pkt->pkt_len = pkt_len;
> +
> +	return true;
> +}
> +
>  /*
>   * Transmit a burst of multi-segments packets.
>   */
> @@ -154,9 +211,8 @@ static void
>  pkt_burst_transmit(struct fwd_stream *fs)  {
>  	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> -	struct rte_port *txp;
>  	struct rte_mbuf *pkt;
> -	struct rte_mbuf *pkt_seg;
> +	struct rte_port *txp;

Unnecessary  change to struct rte_port *txp still there.

>  	struct rte_mempool *mbp;
>  	struct ether_hdr eth_hdr;
>  	uint16_t nb_tx;
> @@ -164,14 +220,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
>  	uint16_t vlan_tci, vlan_tci_outer;
>  	uint32_t retry;
>  	uint64_t ol_flags = 0;
> -	uint8_t  i;
>  	uint64_t tx_offloads;
>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>  	uint64_t start_tsc;
>  	uint64_t end_tsc;
>  	uint64_t core_cycles;
>  #endif
> -	uint32_t nb_segs, pkt_len;
> 
>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>  	start_tsc = rte_rdtsc();
> @@ -188,72 +242,31 @@ pkt_burst_transmit(struct fwd_stream *fs)
>  		ol_flags |= PKT_TX_QINQ_PKT;
>  	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
>  		ol_flags |= PKT_TX_MACSEC;
> +
> +	/*
> +	 * Initialize Ethernet header.
> +	 */
> +	ether_addr_copy(&peer_eth_addrs[fs->peer_addr],
> &eth_hdr.d_addr);
> +	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
> +	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> +
>  	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
>  		pkt = rte_mbuf_raw_alloc(mbp);
> -		if (pkt == NULL) {
> -		nomore_mbuf:
> -			if (nb_pkt == 0)
> -				return;
> +		if (pkt == NULL)
> +			break;
> +		if (unlikely(!pkt_burst_prepare(pkt, mbp,
> +						&eth_hdr, vlan_tci,
> +						vlan_tci_outer,
> +						ol_flags))) {
> +			rte_mempool_put(mbp, pkt);
>  			break;
>  		}
> -
> -		/*
> -		 * Using raw alloc is good to improve performance,
> -		 * but some consumers may use the headroom and so
> -		 * decrement data_off. We need to make sure it is
> -		 * reset to default value.
> -		 */
> -		rte_pktmbuf_reset_headroom(pkt);
> -		pkt->data_len = tx_pkt_seg_lengths[0];
> -		pkt_seg = pkt;
> -		if (tx_pkt_split == TX_PKT_SPLIT_RND)
> -			nb_segs = random() % tx_pkt_nb_segs + 1;
> -		else
> -			nb_segs = tx_pkt_nb_segs;
> -		pkt_len = pkt->data_len;
> -		for (i = 1; i < nb_segs; i++) {
> -			pkt_seg->next = rte_mbuf_raw_alloc(mbp);
> -			if (pkt_seg->next == NULL) {
> -				pkt->nb_segs = i;
> -				rte_pktmbuf_free(pkt);
> -				goto nomore_mbuf;
> -			}
> -			pkt_seg = pkt_seg->next;
> -			pkt_seg->data_len = tx_pkt_seg_lengths[i];
> -			pkt_len += pkt_seg->data_len;
> -		}
> -		pkt_seg->next = NULL; /* Last segment of packet. */
> -
> -		/*
> -		 * Initialize Ethernet header.
> -		 */
> -		ether_addr_copy(&peer_eth_addrs[fs-
> >peer_addr],&eth_hdr.d_addr);
> -		ether_addr_copy(&ports[fs->tx_port].eth_addr,
> &eth_hdr.s_addr);
> -		eth_hdr.ether_type =
> rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> -
> -		/*
> -		 * Copy headers in first packet segment(s).
> -		 */
> -		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
> -		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> -				sizeof(struct ether_hdr));
> -		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> -				sizeof(struct ether_hdr) +
> -				sizeof(struct ipv4_hdr));
> -
> -		/*
> -		 * Complete first mbuf of packet and append it to the
> -		 * burst of packets to be transmitted.
> -		 */
> -		pkt->nb_segs = nb_segs;
> -		pkt->pkt_len = pkt_len;
> -		pkt->ol_flags = ol_flags;
> -		pkt->vlan_tci = vlan_tci;
> -		pkt->vlan_tci_outer = vlan_tci_outer;
> -		pkt->l2_len = sizeof(struct ether_hdr);
> -		pkt->l3_len = sizeof(struct ipv4_hdr);
>  		pkts_burst[nb_pkt] = pkt;
>  	}
> +
> +	if (nb_pkt == 0)
> +		return;
> +
>  	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
> nb_pkt);
>  	/*
>  	 * Retry if necessary
> --
> 2.20.1

Regards,

Bernard.

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd txonly mode
  2019-03-26 16:13   ` [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd " Iremonger, Bernard
@ 2019-03-26 16:13     ` Iremonger, Bernard
  0 siblings, 0 replies; 68+ messages in thread
From: Iremonger, Bernard @ 2019-03-26 16:13 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran, thomas,
	arybchenko, Yigit, Ferruh
  Cc: dev

Hi Pavan,

> -----Original Message-----
> From: Pavan Nikhilesh Bhagavatula [mailto:pbhagavatula@marvell.com]
> Sent: Tuesday, March 26, 2019 1:03 PM
> To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; thomas@monjalon.net;
> arybchenko@solarflare.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> Iremonger, Bernard <bernard.iremonger@intel.com>
> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> <pbhagavatula@marvell.com>
> Subject: [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd txonly
> mode
> 
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Optimize testpmd txonly mode by
> 1. Moving per packet ethernet header copy above the loop.
> 2. Use bulk ops for allocating segments instead of having a inner loop for
> every segment.
> 
> Also, move the packet prepare logic into a separate function so that it can be
> reused later.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>  v4 Changes:
>  - Fix packet len calculation.
> 
>  v3 Changes:
>  - Split the patches for easier review. (Thomas)
>  - Remove unnecessary assignments to 0. (Bernard)
> 
>  v2 Changes:
>  - Use bulk ops for fetching segments. (Andrew Rybchenko)
>  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
>  - Fix mbufs not being freed when there is no more mbufs available for
> segments. (Andrew Rybchenko)
> 
>  app/test-pmd/txonly.c | 141 +++++++++++++++++++++++-------------------
>  1 file changed, 77 insertions(+), 64 deletions(-)
> 
> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index
> 1f08b6ed3..8d49e41b1 100644
> --- a/app/test-pmd/txonly.c
> +++ b/app/test-pmd/txonly.c
> @@ -147,6 +147,63 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr
> *ip_hdr,
>  	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;  }
> 
> +static inline bool
> +pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
> +		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
> +		const uint16_t vlan_tci_outer, const uint64_t ol_flags) {
> +	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
> +	struct rte_mbuf *pkt_seg;
> +	uint32_t nb_segs, pkt_len;
> +	uint8_t i;
> +
> +	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
> +		nb_segs = random() % tx_pkt_nb_segs + 1;
> +	else
> +		nb_segs = tx_pkt_nb_segs;
> +
> +	if (nb_segs > 1) {
> +		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs,
> nb_segs))
> +			return false;
> +	}
> +
> +	rte_pktmbuf_reset_headroom(pkt);
> +	pkt->data_len = tx_pkt_seg_lengths[0];
> +	pkt->ol_flags = ol_flags;
> +	pkt->vlan_tci = vlan_tci;
> +	pkt->vlan_tci_outer = vlan_tci_outer;
> +	pkt->l2_len = sizeof(struct ether_hdr);
> +	pkt->l3_len = sizeof(struct ipv4_hdr);
> +
> +	pkt_len = pkt->data_len;
> +	pkt_seg = pkt;
> +	for (i = 1; i < nb_segs; i++) {
> +		pkt_seg->next = pkt_segs[i - 1];
> +		pkt_seg = pkt_seg->next;
> +		pkt_seg->data_len = tx_pkt_seg_lengths[i];
> +		pkt_len += pkt_seg->data_len;
> +	}
> +	pkt_seg->next = NULL; /* Last segment of packet. */
> +	/*
> +	 * Copy headers in first packet segment(s).
> +	 */
> +	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
> +	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> +			sizeof(struct ether_hdr));
> +	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> +			sizeof(struct ether_hdr) +
> +			sizeof(struct ipv4_hdr));
> +
> +	/*
> +	 * Complete first mbuf of packet and append it to the
> +	 * burst of packets to be transmitted.
> +	 */
> +	pkt->nb_segs = nb_segs;
> +	pkt->pkt_len = pkt_len;
> +
> +	return true;
> +}
> +
>  /*
>   * Transmit a burst of multi-segments packets.
>   */
> @@ -154,9 +211,8 @@ static void
>  pkt_burst_transmit(struct fwd_stream *fs)  {
>  	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> -	struct rte_port *txp;
>  	struct rte_mbuf *pkt;
> -	struct rte_mbuf *pkt_seg;
> +	struct rte_port *txp;

Unnecessary  change to struct rte_port *txp still there.

>  	struct rte_mempool *mbp;
>  	struct ether_hdr eth_hdr;
>  	uint16_t nb_tx;
> @@ -164,14 +220,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
>  	uint16_t vlan_tci, vlan_tci_outer;
>  	uint32_t retry;
>  	uint64_t ol_flags = 0;
> -	uint8_t  i;
>  	uint64_t tx_offloads;
>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>  	uint64_t start_tsc;
>  	uint64_t end_tsc;
>  	uint64_t core_cycles;
>  #endif
> -	uint32_t nb_segs, pkt_len;
> 
>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>  	start_tsc = rte_rdtsc();
> @@ -188,72 +242,31 @@ pkt_burst_transmit(struct fwd_stream *fs)
>  		ol_flags |= PKT_TX_QINQ_PKT;
>  	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
>  		ol_flags |= PKT_TX_MACSEC;
> +
> +	/*
> +	 * Initialize Ethernet header.
> +	 */
> +	ether_addr_copy(&peer_eth_addrs[fs->peer_addr],
> &eth_hdr.d_addr);
> +	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
> +	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> +
>  	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
>  		pkt = rte_mbuf_raw_alloc(mbp);
> -		if (pkt == NULL) {
> -		nomore_mbuf:
> -			if (nb_pkt == 0)
> -				return;
> +		if (pkt == NULL)
> +			break;
> +		if (unlikely(!pkt_burst_prepare(pkt, mbp,
> +						&eth_hdr, vlan_tci,
> +						vlan_tci_outer,
> +						ol_flags))) {
> +			rte_mempool_put(mbp, pkt);
>  			break;
>  		}
> -
> -		/*
> -		 * Using raw alloc is good to improve performance,
> -		 * but some consumers may use the headroom and so
> -		 * decrement data_off. We need to make sure it is
> -		 * reset to default value.
> -		 */
> -		rte_pktmbuf_reset_headroom(pkt);
> -		pkt->data_len = tx_pkt_seg_lengths[0];
> -		pkt_seg = pkt;
> -		if (tx_pkt_split == TX_PKT_SPLIT_RND)
> -			nb_segs = random() % tx_pkt_nb_segs + 1;
> -		else
> -			nb_segs = tx_pkt_nb_segs;
> -		pkt_len = pkt->data_len;
> -		for (i = 1; i < nb_segs; i++) {
> -			pkt_seg->next = rte_mbuf_raw_alloc(mbp);
> -			if (pkt_seg->next == NULL) {
> -				pkt->nb_segs = i;
> -				rte_pktmbuf_free(pkt);
> -				goto nomore_mbuf;
> -			}
> -			pkt_seg = pkt_seg->next;
> -			pkt_seg->data_len = tx_pkt_seg_lengths[i];
> -			pkt_len += pkt_seg->data_len;
> -		}
> -		pkt_seg->next = NULL; /* Last segment of packet. */
> -
> -		/*
> -		 * Initialize Ethernet header.
> -		 */
> -		ether_addr_copy(&peer_eth_addrs[fs-
> >peer_addr],&eth_hdr.d_addr);
> -		ether_addr_copy(&ports[fs->tx_port].eth_addr,
> &eth_hdr.s_addr);
> -		eth_hdr.ether_type =
> rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> -
> -		/*
> -		 * Copy headers in first packet segment(s).
> -		 */
> -		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
> -		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> -				sizeof(struct ether_hdr));
> -		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> -				sizeof(struct ether_hdr) +
> -				sizeof(struct ipv4_hdr));
> -
> -		/*
> -		 * Complete first mbuf of packet and append it to the
> -		 * burst of packets to be transmitted.
> -		 */
> -		pkt->nb_segs = nb_segs;
> -		pkt->pkt_len = pkt_len;
> -		pkt->ol_flags = ol_flags;
> -		pkt->vlan_tci = vlan_tci;
> -		pkt->vlan_tci_outer = vlan_tci_outer;
> -		pkt->l2_len = sizeof(struct ether_hdr);
> -		pkt->l3_len = sizeof(struct ipv4_hdr);
>  		pkts_burst[nb_pkt] = pkt;
>  	}
> +
> +	if (nb_pkt == 0)
> +		return;
> +
>  	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
> nb_pkt);
>  	/*
>  	 * Retry if necessary
> --
> 2.20.1

Regards,

Bernard.


^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-02-28 19:42 [dpdk-dev] [PATCH] app/testpmd: use mempool bulk get for txonly mode Pavan Nikhilesh Bhagavatula
                   ` (3 preceding siblings ...)
  2019-03-26 13:03 ` [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd " Pavan Nikhilesh Bhagavatula
@ 2019-03-31 13:14 ` Pavan Nikhilesh Bhagavatula
  2019-03-31 13:14   ` Pavan Nikhilesh Bhagavatula
                     ` (3 more replies)
  2019-04-02  9:53 ` [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop Pavan Nikhilesh Bhagavatula
  5 siblings, 4 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-31 13:14 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger
  Cc: dev, Pavan Nikhilesh Bhagavatula

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Optimize testpmd txonly mode by
1. Moving per packet ethernet header copy above the loop.
2. Use bulk ops for allocating segments instead of having a inner loop
for every segment.

Also, move the packet prepare logic into a separate function so that it
can be reused later.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 v5 Changes
 - Remove unnecessary change to struct rte_port *txp (movement). (Bernard)

 v4 Changes:
 - Fix packet len calculation.

 v3 Changes:
 - Split the patches for easier review. (Thomas)
 - Remove unnecessary assignments to 0. (Bernard)

 v2 Changes:
 - Use bulk ops for fetching segments. (Andrew Rybchenko)
 - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
 - Fix mbufs not being freed when there is no more mbufs available for
 segments. (Andrew Rybchenko)

 app/test-pmd/txonly.c | 139 +++++++++++++++++++++++-------------------
 1 file changed, 76 insertions(+), 63 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 1f08b6ed3..9c0147089 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -147,6 +147,63 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
 	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
 }

+static inline bool
+pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
+		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
+		const uint16_t vlan_tci_outer, const uint64_t ol_flags)
+{
+	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
+	struct rte_mbuf *pkt_seg;
+	uint32_t nb_segs, pkt_len;
+	uint8_t i;
+
+	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
+		nb_segs = random() % tx_pkt_nb_segs + 1;
+	else
+		nb_segs = tx_pkt_nb_segs;
+
+	if (nb_segs > 1) {
+		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs))
+			return false;
+	}
+
+	rte_pktmbuf_reset_headroom(pkt);
+	pkt->data_len = tx_pkt_seg_lengths[0];
+	pkt->ol_flags = ol_flags;
+	pkt->vlan_tci = vlan_tci;
+	pkt->vlan_tci_outer = vlan_tci_outer;
+	pkt->l2_len = sizeof(struct ether_hdr);
+	pkt->l3_len = sizeof(struct ipv4_hdr);
+
+	pkt_len = pkt->data_len;
+	pkt_seg = pkt;
+	for (i = 1; i < nb_segs; i++) {
+		pkt_seg->next = pkt_segs[i - 1];
+		pkt_seg = pkt_seg->next;
+		pkt_seg->data_len = tx_pkt_seg_lengths[i];
+		pkt_len += pkt_seg->data_len;
+	}
+	pkt_seg->next = NULL; /* Last segment of packet. */
+	/*
+	 * Copy headers in first packet segment(s).
+	 */
+	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
+	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
+			sizeof(struct ether_hdr));
+	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
+			sizeof(struct ether_hdr) +
+			sizeof(struct ipv4_hdr));
+
+	/*
+	 * Complete first mbuf of packet and append it to the
+	 * burst of packets to be transmitted.
+	 */
+	pkt->nb_segs = nb_segs;
+	pkt->pkt_len = pkt_len;
+
+	return true;
+}
+
 /*
  * Transmit a burst of multi-segments packets.
  */
@@ -156,7 +213,6 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
 	struct rte_port *txp;
 	struct rte_mbuf *pkt;
-	struct rte_mbuf *pkt_seg;
 	struct rte_mempool *mbp;
 	struct ether_hdr eth_hdr;
 	uint16_t nb_tx;
@@ -164,14 +220,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	uint16_t vlan_tci, vlan_tci_outer;
 	uint32_t retry;
 	uint64_t ol_flags = 0;
-	uint8_t  i;
 	uint64_t tx_offloads;
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	uint64_t start_tsc;
 	uint64_t end_tsc;
 	uint64_t core_cycles;
 #endif
-	uint32_t nb_segs, pkt_len;

 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	start_tsc = rte_rdtsc();
@@ -188,72 +242,31 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		ol_flags |= PKT_TX_QINQ_PKT;
 	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
 		ol_flags |= PKT_TX_MACSEC;
+
+	/*
+	 * Initialize Ethernet header.
+	 */
+	ether_addr_copy(&peer_eth_addrs[fs->peer_addr], &eth_hdr.d_addr);
+	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
+	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+
 	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
 		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL) {
-		nomore_mbuf:
-			if (nb_pkt == 0)
-				return;
+		if (pkt == NULL)
+			break;
+		if (unlikely(!pkt_burst_prepare(pkt, mbp,
+						&eth_hdr, vlan_tci,
+						vlan_tci_outer,
+						ol_flags))) {
+			rte_mempool_put(mbp, pkt);
 			break;
 		}
-
-		/*
-		 * Using raw alloc is good to improve performance,
-		 * but some consumers may use the headroom and so
-		 * decrement data_off. We need to make sure it is
-		 * reset to default value.
-		 */
-		rte_pktmbuf_reset_headroom(pkt);
-		pkt->data_len = tx_pkt_seg_lengths[0];
-		pkt_seg = pkt;
-		if (tx_pkt_split == TX_PKT_SPLIT_RND)
-			nb_segs = random() % tx_pkt_nb_segs + 1;
-		else
-			nb_segs = tx_pkt_nb_segs;
-		pkt_len = pkt->data_len;
-		for (i = 1; i < nb_segs; i++) {
-			pkt_seg->next = rte_mbuf_raw_alloc(mbp);
-			if (pkt_seg->next == NULL) {
-				pkt->nb_segs = i;
-				rte_pktmbuf_free(pkt);
-				goto nomore_mbuf;
-			}
-			pkt_seg = pkt_seg->next;
-			pkt_seg->data_len = tx_pkt_seg_lengths[i];
-			pkt_len += pkt_seg->data_len;
-		}
-		pkt_seg->next = NULL; /* Last segment of packet. */
-
-		/*
-		 * Initialize Ethernet header.
-		 */
-		ether_addr_copy(&peer_eth_addrs[fs->peer_addr],&eth_hdr.d_addr);
-		ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
-		eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
-
-		/*
-		 * Copy headers in first packet segment(s).
-		 */
-		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
-		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
-				sizeof(struct ether_hdr));
-		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
-				sizeof(struct ether_hdr) +
-				sizeof(struct ipv4_hdr));
-
-		/*
-		 * Complete first mbuf of packet and append it to the
-		 * burst of packets to be transmitted.
-		 */
-		pkt->nb_segs = nb_segs;
-		pkt->pkt_len = pkt_len;
-		pkt->ol_flags = ol_flags;
-		pkt->vlan_tci = vlan_tci;
-		pkt->vlan_tci_outer = vlan_tci_outer;
-		pkt->l2_len = sizeof(struct ether_hdr);
-		pkt->l3_len = sizeof(struct ipv4_hdr);
 		pkts_burst[nb_pkt] = pkt;
 	}
+
+	if (nb_pkt == 0)
+		return;
+
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
 	/*
 	 * Retry if necessary
--
2.21.0

^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-03-31 13:14 ` [dpdk-dev] [PATCH v5 " Pavan Nikhilesh Bhagavatula
@ 2019-03-31 13:14   ` Pavan Nikhilesh Bhagavatula
  2019-03-31 13:14   ` [dpdk-dev] [PATCH v5 2/2] app/testpmd: add mempool bulk get for " Pavan Nikhilesh Bhagavatula
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-31 13:14 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger
  Cc: dev, Pavan Nikhilesh Bhagavatula

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Optimize testpmd txonly mode by
1. Moving per packet ethernet header copy above the loop.
2. Use bulk ops for allocating segments instead of having a inner loop
for every segment.

Also, move the packet prepare logic into a separate function so that it
can be reused later.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 v5 Changes
 - Remove unnecessary change to struct rte_port *txp (movement). (Bernard)

 v4 Changes:
 - Fix packet len calculation.

 v3 Changes:
 - Split the patches for easier review. (Thomas)
 - Remove unnecessary assignments to 0. (Bernard)

 v2 Changes:
 - Use bulk ops for fetching segments. (Andrew Rybchenko)
 - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
 - Fix mbufs not being freed when there is no more mbufs available for
 segments. (Andrew Rybchenko)

 app/test-pmd/txonly.c | 139 +++++++++++++++++++++++-------------------
 1 file changed, 76 insertions(+), 63 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 1f08b6ed3..9c0147089 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -147,6 +147,63 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
 	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
 }

+static inline bool
+pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
+		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
+		const uint16_t vlan_tci_outer, const uint64_t ol_flags)
+{
+	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
+	struct rte_mbuf *pkt_seg;
+	uint32_t nb_segs, pkt_len;
+	uint8_t i;
+
+	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
+		nb_segs = random() % tx_pkt_nb_segs + 1;
+	else
+		nb_segs = tx_pkt_nb_segs;
+
+	if (nb_segs > 1) {
+		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs))
+			return false;
+	}
+
+	rte_pktmbuf_reset_headroom(pkt);
+	pkt->data_len = tx_pkt_seg_lengths[0];
+	pkt->ol_flags = ol_flags;
+	pkt->vlan_tci = vlan_tci;
+	pkt->vlan_tci_outer = vlan_tci_outer;
+	pkt->l2_len = sizeof(struct ether_hdr);
+	pkt->l3_len = sizeof(struct ipv4_hdr);
+
+	pkt_len = pkt->data_len;
+	pkt_seg = pkt;
+	for (i = 1; i < nb_segs; i++) {
+		pkt_seg->next = pkt_segs[i - 1];
+		pkt_seg = pkt_seg->next;
+		pkt_seg->data_len = tx_pkt_seg_lengths[i];
+		pkt_len += pkt_seg->data_len;
+	}
+	pkt_seg->next = NULL; /* Last segment of packet. */
+	/*
+	 * Copy headers in first packet segment(s).
+	 */
+	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
+	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
+			sizeof(struct ether_hdr));
+	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
+			sizeof(struct ether_hdr) +
+			sizeof(struct ipv4_hdr));
+
+	/*
+	 * Complete first mbuf of packet and append it to the
+	 * burst of packets to be transmitted.
+	 */
+	pkt->nb_segs = nb_segs;
+	pkt->pkt_len = pkt_len;
+
+	return true;
+}
+
 /*
  * Transmit a burst of multi-segments packets.
  */
@@ -156,7 +213,6 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
 	struct rte_port *txp;
 	struct rte_mbuf *pkt;
-	struct rte_mbuf *pkt_seg;
 	struct rte_mempool *mbp;
 	struct ether_hdr eth_hdr;
 	uint16_t nb_tx;
@@ -164,14 +220,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	uint16_t vlan_tci, vlan_tci_outer;
 	uint32_t retry;
 	uint64_t ol_flags = 0;
-	uint8_t  i;
 	uint64_t tx_offloads;
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	uint64_t start_tsc;
 	uint64_t end_tsc;
 	uint64_t core_cycles;
 #endif
-	uint32_t nb_segs, pkt_len;

 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	start_tsc = rte_rdtsc();
@@ -188,72 +242,31 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		ol_flags |= PKT_TX_QINQ_PKT;
 	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
 		ol_flags |= PKT_TX_MACSEC;
+
+	/*
+	 * Initialize Ethernet header.
+	 */
+	ether_addr_copy(&peer_eth_addrs[fs->peer_addr], &eth_hdr.d_addr);
+	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
+	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+
 	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
 		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL) {
-		nomore_mbuf:
-			if (nb_pkt == 0)
-				return;
+		if (pkt == NULL)
+			break;
+		if (unlikely(!pkt_burst_prepare(pkt, mbp,
+						&eth_hdr, vlan_tci,
+						vlan_tci_outer,
+						ol_flags))) {
+			rte_mempool_put(mbp, pkt);
 			break;
 		}
-
-		/*
-		 * Using raw alloc is good to improve performance,
-		 * but some consumers may use the headroom and so
-		 * decrement data_off. We need to make sure it is
-		 * reset to default value.
-		 */
-		rte_pktmbuf_reset_headroom(pkt);
-		pkt->data_len = tx_pkt_seg_lengths[0];
-		pkt_seg = pkt;
-		if (tx_pkt_split == TX_PKT_SPLIT_RND)
-			nb_segs = random() % tx_pkt_nb_segs + 1;
-		else
-			nb_segs = tx_pkt_nb_segs;
-		pkt_len = pkt->data_len;
-		for (i = 1; i < nb_segs; i++) {
-			pkt_seg->next = rte_mbuf_raw_alloc(mbp);
-			if (pkt_seg->next == NULL) {
-				pkt->nb_segs = i;
-				rte_pktmbuf_free(pkt);
-				goto nomore_mbuf;
-			}
-			pkt_seg = pkt_seg->next;
-			pkt_seg->data_len = tx_pkt_seg_lengths[i];
-			pkt_len += pkt_seg->data_len;
-		}
-		pkt_seg->next = NULL; /* Last segment of packet. */
-
-		/*
-		 * Initialize Ethernet header.
-		 */
-		ether_addr_copy(&peer_eth_addrs[fs->peer_addr],&eth_hdr.d_addr);
-		ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
-		eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
-
-		/*
-		 * Copy headers in first packet segment(s).
-		 */
-		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
-		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
-				sizeof(struct ether_hdr));
-		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
-				sizeof(struct ether_hdr) +
-				sizeof(struct ipv4_hdr));
-
-		/*
-		 * Complete first mbuf of packet and append it to the
-		 * burst of packets to be transmitted.
-		 */
-		pkt->nb_segs = nb_segs;
-		pkt->pkt_len = pkt_len;
-		pkt->ol_flags = ol_flags;
-		pkt->vlan_tci = vlan_tci;
-		pkt->vlan_tci_outer = vlan_tci_outer;
-		pkt->l2_len = sizeof(struct ether_hdr);
-		pkt->l3_len = sizeof(struct ipv4_hdr);
 		pkts_burst[nb_pkt] = pkt;
 	}
+
+	if (nb_pkt == 0)
+		return;
+
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
 	/*
 	 * Retry if necessary
--
2.21.0


^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v5 2/2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-31 13:14 ` [dpdk-dev] [PATCH v5 " Pavan Nikhilesh Bhagavatula
  2019-03-31 13:14   ` Pavan Nikhilesh Bhagavatula
@ 2019-03-31 13:14   ` Pavan Nikhilesh Bhagavatula
  2019-03-31 13:14     ` Pavan Nikhilesh Bhagavatula
  2019-04-01 20:25   ` [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd " Ferruh Yigit
  2019-04-02  9:03   ` Ali Alnubani
  3 siblings, 1 reply; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-31 13:14 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger
  Cc: dev, Pavan Nikhilesh Bhagavatula, Yingya Han

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use mempool bulk get ops to alloc burst of packets and process them.
If bulk get fails fallback to rte_mbuf_raw_alloc.

Tested-by: Yingya Han <yingyax.han@intel.com>
Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test-pmd/txonly.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 9c0147089..4c34ef128 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -250,18 +250,33 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
 	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
 
-	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
-		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL)
-			break;
-		if (unlikely(!pkt_burst_prepare(pkt, mbp,
-						&eth_hdr, vlan_tci,
-						vlan_tci_outer,
-						ol_flags))) {
-			rte_mempool_put(mbp, pkt);
-			break;
+	if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
+				nb_pkt_per_burst) == 0) {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put_bulk(mbp,
+						(void **)&pkts_burst[nb_pkt],
+						nb_pkt_per_burst - nb_pkt);
+				break;
+			}
+		}
+	} else {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			pkt = rte_mbuf_raw_alloc(mbp);
+			if (pkt == NULL)
+				break;
+			if (unlikely(!pkt_burst_prepare(pkt, mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put(mbp, pkt);
+				break;
+			}
+			pkts_burst[nb_pkt] = pkt;
 		}
-		pkts_burst[nb_pkt] = pkt;
 	}
 
 	if (nb_pkt == 0)
-- 
2.21.0

^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v5 2/2] app/testpmd: add mempool bulk get for txonly mode
  2019-03-31 13:14   ` [dpdk-dev] [PATCH v5 2/2] app/testpmd: add mempool bulk get for " Pavan Nikhilesh Bhagavatula
@ 2019-03-31 13:14     ` Pavan Nikhilesh Bhagavatula
  0 siblings, 0 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-03-31 13:14 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger
  Cc: dev, Pavan Nikhilesh Bhagavatula, Yingya Han

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use mempool bulk get ops to alloc burst of packets and process them.
If bulk get fails fallback to rte_mbuf_raw_alloc.

Tested-by: Yingya Han <yingyax.han@intel.com>
Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test-pmd/txonly.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 9c0147089..4c34ef128 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -250,18 +250,33 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
 	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
 
-	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
-		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL)
-			break;
-		if (unlikely(!pkt_burst_prepare(pkt, mbp,
-						&eth_hdr, vlan_tci,
-						vlan_tci_outer,
-						ol_flags))) {
-			rte_mempool_put(mbp, pkt);
-			break;
+	if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
+				nb_pkt_per_burst) == 0) {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put_bulk(mbp,
+						(void **)&pkts_burst[nb_pkt],
+						nb_pkt_per_burst - nb_pkt);
+				break;
+			}
+		}
+	} else {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			pkt = rte_mbuf_raw_alloc(mbp);
+			if (pkt == NULL)
+				break;
+			if (unlikely(!pkt_burst_prepare(pkt, mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put(mbp, pkt);
+				break;
+			}
+			pkts_burst[nb_pkt] = pkt;
 		}
-		pkts_burst[nb_pkt] = pkt;
 	}
 
 	if (nb_pkt == 0)
-- 
2.21.0


^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-03-31 13:14 ` [dpdk-dev] [PATCH v5 " Pavan Nikhilesh Bhagavatula
  2019-03-31 13:14   ` Pavan Nikhilesh Bhagavatula
  2019-03-31 13:14   ` [dpdk-dev] [PATCH v5 2/2] app/testpmd: add mempool bulk get for " Pavan Nikhilesh Bhagavatula
@ 2019-04-01 20:25   ` Ferruh Yigit
  2019-04-01 20:25     ` Ferruh Yigit
  2019-04-01 20:53     ` Thomas Monjalon
  2019-04-02  9:03   ` Ali Alnubani
  3 siblings, 2 replies; 68+ messages in thread
From: Ferruh Yigit @ 2019-04-01 20:25 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran, thomas,
	arybchenko, bernard.iremonger, Shahaf Shuler
  Cc: dev

On 3/31/2019 2:14 PM, Pavan Nikhilesh Bhagavatula wrote:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Optimize testpmd txonly mode by
> 1. Moving per packet ethernet header copy above the loop.
> 2. Use bulk ops for allocating segments instead of having a inner loop
> for every segment.
> 
> Also, move the packet prepare logic into a separate function so that it
> can be reused later.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>  v5 Changes
>  - Remove unnecessary change to struct rte_port *txp (movement). (Bernard)
> 
>  v4 Changes:
>  - Fix packet len calculation.
> 
>  v3 Changes:
>  - Split the patches for easier review. (Thomas)
>  - Remove unnecessary assignments to 0. (Bernard)
> 
>  v2 Changes:
>  - Use bulk ops for fetching segments. (Andrew Rybchenko)
>  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
>  - Fix mbufs not being freed when there is no more mbufs available for
>  segments. (Andrew Rybchenko)

Hi Thomas, Shahafs,

I guess there was a performance issue on Mellanox with this patch, I assume it
is still valid, since this version only has some cosmetic change, but can you
please confirm?

And what is the next step, can you guys provide some info to Pavan to solve the
issue, or perhaps even better a fix?

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-04-01 20:25   ` [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd " Ferruh Yigit
@ 2019-04-01 20:25     ` Ferruh Yigit
  2019-04-01 20:53     ` Thomas Monjalon
  1 sibling, 0 replies; 68+ messages in thread
From: Ferruh Yigit @ 2019-04-01 20:25 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran, thomas,
	arybchenko, bernard.iremonger, Shahaf Shuler
  Cc: dev

On 3/31/2019 2:14 PM, Pavan Nikhilesh Bhagavatula wrote:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Optimize testpmd txonly mode by
> 1. Moving per packet ethernet header copy above the loop.
> 2. Use bulk ops for allocating segments instead of having a inner loop
> for every segment.
> 
> Also, move the packet prepare logic into a separate function so that it
> can be reused later.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>  v5 Changes
>  - Remove unnecessary change to struct rte_port *txp (movement). (Bernard)
> 
>  v4 Changes:
>  - Fix packet len calculation.
> 
>  v3 Changes:
>  - Split the patches for easier review. (Thomas)
>  - Remove unnecessary assignments to 0. (Bernard)
> 
>  v2 Changes:
>  - Use bulk ops for fetching segments. (Andrew Rybchenko)
>  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
>  - Fix mbufs not being freed when there is no more mbufs available for
>  segments. (Andrew Rybchenko)

Hi Thomas, Shahafs,

I guess there was a performance issue on Mellanox with this patch, I assume it
is still valid, since this version only has some cosmetic change, but can you
please confirm?

And what is the next step, can you guys provide some info to Pavan to solve the
issue, or perhaps even better a fix?

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-04-01 20:25   ` [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd " Ferruh Yigit
  2019-04-01 20:25     ` Ferruh Yigit
@ 2019-04-01 20:53     ` Thomas Monjalon
  2019-04-01 20:53       ` Thomas Monjalon
  2019-04-02  1:03       ` Jerin Jacob Kollanukkaran
  1 sibling, 2 replies; 68+ messages in thread
From: Thomas Monjalon @ 2019-04-01 20:53 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula
  Cc: dev, Ferruh Yigit, Jerin Jacob Kollanukkaran, arybchenko,
	bernard.iremonger, Shahaf Shuler

01/04/2019 22:25, Ferruh Yigit:
> On 3/31/2019 2:14 PM, Pavan Nikhilesh Bhagavatula wrote:
> > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > 
> > Optimize testpmd txonly mode by
> > 1. Moving per packet ethernet header copy above the loop.
> > 2. Use bulk ops for allocating segments instead of having a inner loop
> > for every segment.
> > 
> > Also, move the packet prepare logic into a separate function so that it
> > can be reused later.
> > 
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > ---
> >  v5 Changes
> >  - Remove unnecessary change to struct rte_port *txp (movement). (Bernard)
> > 
> >  v4 Changes:
> >  - Fix packet len calculation.
> > 
> >  v3 Changes:
> >  - Split the patches for easier review. (Thomas)
> >  - Remove unnecessary assignments to 0. (Bernard)
> > 
> >  v2 Changes:
> >  - Use bulk ops for fetching segments. (Andrew Rybchenko)
> >  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
> >  - Fix mbufs not being freed when there is no more mbufs available for
> >  segments. (Andrew Rybchenko)
> 
> Hi Thomas, Shahafs,
> 
> I guess there was a performance issue on Mellanox with this patch, I assume it
> is still valid, since this version only has some cosmetic change, but can you
> please confirm?

We will check it.

> And what is the next step, can you guys provide some info to Pavan to solve the
> issue, or perhaps even better a fix?

Looking at the first patch, there are still 3 changes merged together.
Why not splitting even more?

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-04-01 20:53     ` Thomas Monjalon
@ 2019-04-01 20:53       ` Thomas Monjalon
  2019-04-02  1:03       ` Jerin Jacob Kollanukkaran
  1 sibling, 0 replies; 68+ messages in thread
From: Thomas Monjalon @ 2019-04-01 20:53 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula
  Cc: dev, Ferruh Yigit, Jerin Jacob Kollanukkaran, arybchenko,
	bernard.iremonger, Shahaf Shuler

01/04/2019 22:25, Ferruh Yigit:
> On 3/31/2019 2:14 PM, Pavan Nikhilesh Bhagavatula wrote:
> > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > 
> > Optimize testpmd txonly mode by
> > 1. Moving per packet ethernet header copy above the loop.
> > 2. Use bulk ops for allocating segments instead of having a inner loop
> > for every segment.
> > 
> > Also, move the packet prepare logic into a separate function so that it
> > can be reused later.
> > 
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > ---
> >  v5 Changes
> >  - Remove unnecessary change to struct rte_port *txp (movement). (Bernard)
> > 
> >  v4 Changes:
> >  - Fix packet len calculation.
> > 
> >  v3 Changes:
> >  - Split the patches for easier review. (Thomas)
> >  - Remove unnecessary assignments to 0. (Bernard)
> > 
> >  v2 Changes:
> >  - Use bulk ops for fetching segments. (Andrew Rybchenko)
> >  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
> >  - Fix mbufs not being freed when there is no more mbufs available for
> >  segments. (Andrew Rybchenko)
> 
> Hi Thomas, Shahafs,
> 
> I guess there was a performance issue on Mellanox with this patch, I assume it
> is still valid, since this version only has some cosmetic change, but can you
> please confirm?

We will check it.

> And what is the next step, can you guys provide some info to Pavan to solve the
> issue, or perhaps even better a fix?

Looking at the first patch, there are still 3 changes merged together.
Why not splitting even more?




^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-04-01 20:53     ` Thomas Monjalon
  2019-04-01 20:53       ` Thomas Monjalon
@ 2019-04-02  1:03       ` Jerin Jacob Kollanukkaran
  2019-04-02  1:03         ` Jerin Jacob Kollanukkaran
  2019-04-02  7:06         ` Thomas Monjalon
  1 sibling, 2 replies; 68+ messages in thread
From: Jerin Jacob Kollanukkaran @ 2019-04-02  1:03 UTC (permalink / raw)
  To: thomas, Pavan Nikhilesh Bhagavatula
  Cc: shahafs, bernard.iremonger, dev, arybchenko, ferruh.yigit

On Mon, 2019-04-01 at 22:53 +0200, Thomas Monjalon wrote:
> 01/04/2019 22:25, Ferruh Yigit:
> > On 3/31/2019 2:14 PM, Pavan Nikhilesh Bhagavatula wrote:
> > > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > > 
> > > Optimize testpmd txonly mode by
> > > 1. Moving per packet ethernet header copy above the loop.
> > > 2. Use bulk ops for allocating segments instead of having a inner
> > > loop
> > > for every segment.
> > > 
> > > Also, move the packet prepare logic into a separate function so
> > > that it
> > > can be reused later.
> > > 
> > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > > ---
> > >  v5 Changes
> > >  - Remove unnecessary change to struct rte_port *txp (movement).
> > > (Bernard)
> > > 
> > >  v4 Changes:
> > >  - Fix packet len calculation.
> > > 
> > >  v3 Changes:
> > >  - Split the patches for easier review. (Thomas)
> > >  - Remove unnecessary assignments to 0. (Bernard)
> > > 
> > >  v2 Changes:
> > >  - Use bulk ops for fetching segments. (Andrew Rybchenko)
> > >  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew
> > > Rybchenko)
> > >  - Fix mbufs not being freed when there is no more mbufs
> > > available for
> > >  segments. (Andrew Rybchenko)
> > 
> > Hi Thomas, Shahafs,
> > 
> > I guess there was a performance issue on Mellanox with this patch,
> > I assume it
> > is still valid, since this version only has some cosmetic change,
> > but can you
> > please confirm?
> 
> We will check it.
> 
> > And what is the next step, can you guys provide some info to Pavan
> > to solve the
> > issue, or perhaps even better a fix?
> 
> Looking at the first patch, there are still 3 changes merged
> together.
> Why not splitting even more?

Splitting further more is not a issue. But we should not start the
thread for squashing it patch latter. What would be interesting to know
if there is any performance degradation with Mellanox NIC? If so, Why?
Based on that, We can craft the patch as you need.


> 
> 
> 

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-04-02  1:03       ` Jerin Jacob Kollanukkaran
@ 2019-04-02  1:03         ` Jerin Jacob Kollanukkaran
  2019-04-02  7:06         ` Thomas Monjalon
  1 sibling, 0 replies; 68+ messages in thread
From: Jerin Jacob Kollanukkaran @ 2019-04-02  1:03 UTC (permalink / raw)
  To: thomas, Pavan Nikhilesh Bhagavatula
  Cc: shahafs, bernard.iremonger, dev, arybchenko, ferruh.yigit

On Mon, 2019-04-01 at 22:53 +0200, Thomas Monjalon wrote:
> 01/04/2019 22:25, Ferruh Yigit:
> > On 3/31/2019 2:14 PM, Pavan Nikhilesh Bhagavatula wrote:
> > > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > > 
> > > Optimize testpmd txonly mode by
> > > 1. Moving per packet ethernet header copy above the loop.
> > > 2. Use bulk ops for allocating segments instead of having a inner
> > > loop
> > > for every segment.
> > > 
> > > Also, move the packet prepare logic into a separate function so
> > > that it
> > > can be reused later.
> > > 
> > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > > ---
> > >  v5 Changes
> > >  - Remove unnecessary change to struct rte_port *txp (movement).
> > > (Bernard)
> > > 
> > >  v4 Changes:
> > >  - Fix packet len calculation.
> > > 
> > >  v3 Changes:
> > >  - Split the patches for easier review. (Thomas)
> > >  - Remove unnecessary assignments to 0. (Bernard)
> > > 
> > >  v2 Changes:
> > >  - Use bulk ops for fetching segments. (Andrew Rybchenko)
> > >  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew
> > > Rybchenko)
> > >  - Fix mbufs not being freed when there is no more mbufs
> > > available for
> > >  segments. (Andrew Rybchenko)
> > 
> > Hi Thomas, Shahafs,
> > 
> > I guess there was a performance issue on Mellanox with this patch,
> > I assume it
> > is still valid, since this version only has some cosmetic change,
> > but can you
> > please confirm?
> 
> We will check it.
> 
> > And what is the next step, can you guys provide some info to Pavan
> > to solve the
> > issue, or perhaps even better a fix?
> 
> Looking at the first patch, there are still 3 changes merged
> together.
> Why not splitting even more?

Splitting further more is not a issue. But we should not start the
thread for squashing it patch latter. What would be interesting to know
if there is any performance degradation with Mellanox NIC? If so, Why?
Based on that, We can craft the patch as you need.


> 
> 
> 

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-04-02  1:03       ` Jerin Jacob Kollanukkaran
  2019-04-02  1:03         ` Jerin Jacob Kollanukkaran
@ 2019-04-02  7:06         ` Thomas Monjalon
  2019-04-02  7:06           ` Thomas Monjalon
  2019-04-02  8:31           ` Jerin Jacob Kollanukkaran
  1 sibling, 2 replies; 68+ messages in thread
From: Thomas Monjalon @ 2019-04-02  7:06 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran
  Cc: Pavan Nikhilesh Bhagavatula, shahafs, bernard.iremonger, dev,
	arybchenko, ferruh.yigit

02/04/2019 03:03, Jerin Jacob Kollanukkaran:
> On Mon, 2019-04-01 at 22:53 +0200, Thomas Monjalon wrote:
> > 01/04/2019 22:25, Ferruh Yigit:
> > > On 3/31/2019 2:14 PM, Pavan Nikhilesh Bhagavatula wrote:
> > > > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > > > 
> > > > Optimize testpmd txonly mode by
> > > > 1. Moving per packet ethernet header copy above the loop.
> > > > 2. Use bulk ops for allocating segments instead of having a inner
> > > > loop
> > > > for every segment.
> > > > 
> > > > Also, move the packet prepare logic into a separate function so
> > > > that it
> > > > can be reused later.
> > > > 
> > > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > > > ---
> > > >  v5 Changes
> > > >  - Remove unnecessary change to struct rte_port *txp (movement).
> > > > (Bernard)
> > > > 
> > > >  v4 Changes:
> > > >  - Fix packet len calculation.
> > > > 
> > > >  v3 Changes:
> > > >  - Split the patches for easier review. (Thomas)
> > > >  - Remove unnecessary assignments to 0. (Bernard)
> > > > 
> > > >  v2 Changes:
> > > >  - Use bulk ops for fetching segments. (Andrew Rybchenko)
> > > >  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew
> > > > Rybchenko)
> > > >  - Fix mbufs not being freed when there is no more mbufs
> > > > available for
> > > >  segments. (Andrew Rybchenko)
> > > 
> > > Hi Thomas, Shahafs,
> > > 
> > > I guess there was a performance issue on Mellanox with this patch,
> > > I assume it
> > > is still valid, since this version only has some cosmetic change,
> > > but can you
> > > please confirm?
> > 
> > We will check it.
> > 
> > > And what is the next step, can you guys provide some info to Pavan
> > > to solve the
> > > issue, or perhaps even better a fix?
> > 
> > Looking at the first patch, there are still 3 changes merged
> > together.
> > Why not splitting even more?
> 
> Splitting further more is not a issue. But we should not start the
> thread for squashing it patch latter. What would be interesting to know
> if there is any performance degradation with Mellanox NIC? If so, Why?
> Based on that, We can craft the patch as you need.

Regarding Mellanox degradation, we need to check if this is in this patch.

Not related to performance degradation, it is a good practice
to split logical changes of a rework.

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-04-02  7:06         ` Thomas Monjalon
@ 2019-04-02  7:06           ` Thomas Monjalon
  2019-04-02  8:31           ` Jerin Jacob Kollanukkaran
  1 sibling, 0 replies; 68+ messages in thread
From: Thomas Monjalon @ 2019-04-02  7:06 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran
  Cc: Pavan Nikhilesh Bhagavatula, shahafs, bernard.iremonger, dev,
	arybchenko, ferruh.yigit

02/04/2019 03:03, Jerin Jacob Kollanukkaran:
> On Mon, 2019-04-01 at 22:53 +0200, Thomas Monjalon wrote:
> > 01/04/2019 22:25, Ferruh Yigit:
> > > On 3/31/2019 2:14 PM, Pavan Nikhilesh Bhagavatula wrote:
> > > > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > > > 
> > > > Optimize testpmd txonly mode by
> > > > 1. Moving per packet ethernet header copy above the loop.
> > > > 2. Use bulk ops for allocating segments instead of having a inner
> > > > loop
> > > > for every segment.
> > > > 
> > > > Also, move the packet prepare logic into a separate function so
> > > > that it
> > > > can be reused later.
> > > > 
> > > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > > > ---
> > > >  v5 Changes
> > > >  - Remove unnecessary change to struct rte_port *txp (movement).
> > > > (Bernard)
> > > > 
> > > >  v4 Changes:
> > > >  - Fix packet len calculation.
> > > > 
> > > >  v3 Changes:
> > > >  - Split the patches for easier review. (Thomas)
> > > >  - Remove unnecessary assignments to 0. (Bernard)
> > > > 
> > > >  v2 Changes:
> > > >  - Use bulk ops for fetching segments. (Andrew Rybchenko)
> > > >  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew
> > > > Rybchenko)
> > > >  - Fix mbufs not being freed when there is no more mbufs
> > > > available for
> > > >  segments. (Andrew Rybchenko)
> > > 
> > > Hi Thomas, Shahafs,
> > > 
> > > I guess there was a performance issue on Mellanox with this patch,
> > > I assume it
> > > is still valid, since this version only has some cosmetic change,
> > > but can you
> > > please confirm?
> > 
> > We will check it.
> > 
> > > And what is the next step, can you guys provide some info to Pavan
> > > to solve the
> > > issue, or perhaps even better a fix?
> > 
> > Looking at the first patch, there are still 3 changes merged
> > together.
> > Why not splitting even more?
> 
> Splitting further more is not a issue. But we should not start the
> thread for squashing it patch latter. What would be interesting to know
> if there is any performance degradation with Mellanox NIC? If so, Why?
> Based on that, We can craft the patch as you need.

Regarding Mellanox degradation, we need to check if this is in this patch.

Not related to performance degradation, it is a good practice
to split logical changes of a rework.



^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-04-02  7:06         ` Thomas Monjalon
  2019-04-02  7:06           ` Thomas Monjalon
@ 2019-04-02  8:31           ` Jerin Jacob Kollanukkaran
  2019-04-02  8:31             ` Jerin Jacob Kollanukkaran
  1 sibling, 1 reply; 68+ messages in thread
From: Jerin Jacob Kollanukkaran @ 2019-04-02  8:31 UTC (permalink / raw)
  To: thomas
  Cc: shahafs, bernard.iremonger, dev, Pavan Nikhilesh Bhagavatula,
	arybchenko, ferruh.yigit

On Tue, 2019-04-02 at 09:06 +0200, Thomas Monjalon wrote:
> 02/04/2019 03:03, Jerin Jacob Kollanukkaran:
> > On Mon, 2019-04-01 at 22:53 +0200, Thomas Monjalon wrote:
> > > 01/04/2019 22:25, Ferruh Yigit:
> > > > On 3/31/2019 2:14 PM, Pavan Nikhilesh Bhagavatula wrote:
> > > > > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > > > > 
> > > > > Optimize testpmd txonly mode by
> > > > > 1. Moving per packet ethernet header copy above the loop.
> > > > > 2. Use bulk ops for allocating segments instead of having a
> > > > > inner
> > > > > loop
> > > > > for every segment.
> > > > > 
> > > > > Also, move the packet prepare logic into a separate function
> > > > > so
> > > > > that it
> > > > > can be reused later.
> > > > > 
> > > > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > > > > ---
> > > > >  v5 Changes
> > > > >  - Remove unnecessary change to struct rte_port *txp
> > > > > (movement).
> > > > > (Bernard)
> > > > > 
> > > > >  v4 Changes:
> > > > >  - Fix packet len calculation.
> > > > > 
> > > > >  v3 Changes:
> > > > >  - Split the patches for easier review. (Thomas)
> > > > >  - Remove unnecessary assignments to 0. (Bernard)
> > > > > 
> > > > >  v2 Changes:
> > > > >  - Use bulk ops for fetching segments. (Andrew Rybchenko)
> > > > >  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew
> > > > > Rybchenko)
> > > > >  - Fix mbufs not being freed when there is no more mbufs
> > > > > available for
> > > > >  segments. (Andrew Rybchenko)
> > > > 
> > > > Hi Thomas, Shahafs,
> > > > 
> > > > I guess there was a performance issue on Mellanox with this
> > > > patch,
> > > > I assume it
> > > > is still valid, since this version only has some cosmetic
> > > > change,
> > > > but can you
> > > > please confirm?
> > > 
> > > We will check it.
> > > 
> > > > And what is the next step, can you guys provide some info to
> > > > Pavan
> > > > to solve the
> > > > issue, or perhaps even better a fix?
> > > 
> > > Looking at the first patch, there are still 3 changes merged
> > > together.
> > > Why not splitting even more?
> > 
> > Splitting further more is not a issue. But we should not start the
> > thread for squashing it patch latter. What would be interesting to
> > know
> > if there is any performance degradation with Mellanox NIC? If so,
> > Why?
> > Based on that, We can craft the patch as you need.
> 
> Regarding Mellanox degradation, we need to check if this is in this
> patch.

Please check.

> 
> Not related to performance degradation, it is a good practice
> to split logical changes of a rework.

Yes. No disagreement here. What I would like to emphasis here is that,
It is critical to know, Is there any degradation as this patch has been
blocked by saying there is a degradation with Mellanox NIC.

It is trivial to split the patches N more logical one. The former one
is complex and it has dependency.




> 
> 

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-04-02  8:31           ` Jerin Jacob Kollanukkaran
@ 2019-04-02  8:31             ` Jerin Jacob Kollanukkaran
  0 siblings, 0 replies; 68+ messages in thread
From: Jerin Jacob Kollanukkaran @ 2019-04-02  8:31 UTC (permalink / raw)
  To: thomas
  Cc: shahafs, bernard.iremonger, dev, Pavan Nikhilesh Bhagavatula,
	arybchenko, ferruh.yigit

On Tue, 2019-04-02 at 09:06 +0200, Thomas Monjalon wrote:
> 02/04/2019 03:03, Jerin Jacob Kollanukkaran:
> > On Mon, 2019-04-01 at 22:53 +0200, Thomas Monjalon wrote:
> > > 01/04/2019 22:25, Ferruh Yigit:
> > > > On 3/31/2019 2:14 PM, Pavan Nikhilesh Bhagavatula wrote:
> > > > > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > > > > 
> > > > > Optimize testpmd txonly mode by
> > > > > 1. Moving per packet ethernet header copy above the loop.
> > > > > 2. Use bulk ops for allocating segments instead of having a
> > > > > inner
> > > > > loop
> > > > > for every segment.
> > > > > 
> > > > > Also, move the packet prepare logic into a separate function
> > > > > so
> > > > > that it
> > > > > can be reused later.
> > > > > 
> > > > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > > > > ---
> > > > >  v5 Changes
> > > > >  - Remove unnecessary change to struct rte_port *txp
> > > > > (movement).
> > > > > (Bernard)
> > > > > 
> > > > >  v4 Changes:
> > > > >  - Fix packet len calculation.
> > > > > 
> > > > >  v3 Changes:
> > > > >  - Split the patches for easier review. (Thomas)
> > > > >  - Remove unnecessary assignments to 0. (Bernard)
> > > > > 
> > > > >  v2 Changes:
> > > > >  - Use bulk ops for fetching segments. (Andrew Rybchenko)
> > > > >  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew
> > > > > Rybchenko)
> > > > >  - Fix mbufs not being freed when there is no more mbufs
> > > > > available for
> > > > >  segments. (Andrew Rybchenko)
> > > > 
> > > > Hi Thomas, Shahafs,
> > > > 
> > > > I guess there was a performance issue on Mellanox with this
> > > > patch,
> > > > I assume it
> > > > is still valid, since this version only has some cosmetic
> > > > change,
> > > > but can you
> > > > please confirm?
> > > 
> > > We will check it.
> > > 
> > > > And what is the next step, can you guys provide some info to
> > > > Pavan
> > > > to solve the
> > > > issue, or perhaps even better a fix?
> > > 
> > > Looking at the first patch, there are still 3 changes merged
> > > together.
> > > Why not splitting even more?
> > 
> > Splitting further more is not a issue. But we should not start the
> > thread for squashing it patch latter. What would be interesting to
> > know
> > if there is any performance degradation with Mellanox NIC? If so,
> > Why?
> > Based on that, We can craft the patch as you need.
> 
> Regarding Mellanox degradation, we need to check if this is in this
> patch.

Please check.

> 
> Not related to performance degradation, it is a good practice
> to split logical changes of a rework.

Yes. No disagreement here. What I would like to emphasis here is that,
It is critical to know, Is there any degradation as this patch has been
blocked by saying there is a degradation with Mellanox NIC.

It is trivial to split the patches N more logical one. The former one
is complex and it has dependency.




> 
> 

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-03-31 13:14 ` [dpdk-dev] [PATCH v5 " Pavan Nikhilesh Bhagavatula
                     ` (2 preceding siblings ...)
  2019-04-01 20:25   ` [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd " Ferruh Yigit
@ 2019-04-02  9:03   ` Ali Alnubani
  2019-04-02  9:03     ` Ali Alnubani
  2019-04-02  9:06     ` Pavan Nikhilesh Bhagavatula
  3 siblings, 2 replies; 68+ messages in thread
From: Ali Alnubani @ 2019-04-02  9:03 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula
  Cc: dev, Jerin Jacob Kollanukkaran, Thomas Monjalon, arybchenko,
	ferruh.yigit, bernard.iremonger

Hi Pavan,

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Pavan Nikhilesh
> Bhagavatula
> Sent: Sunday, March 31, 2019 4:14 PM
> To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Thomas Monjalon
> <thomas@monjalon.net>; arybchenko@solarflare.com;
> ferruh.yigit@intel.com; bernard.iremonger@intel.com
> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> <pbhagavatula@marvell.com>
> Subject: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly
> mode
> 
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Optimize testpmd txonly mode by
> 1. Moving per packet ethernet header copy above the loop.
> 2. Use bulk ops for allocating segments instead of having a inner loop for
> every segment.
> 
> Also, move the packet prepare logic into a separate function so that it can be
> reused later.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>  v5 Changes
>  - Remove unnecessary change to struct rte_port *txp (movement).
> (Bernard)
> 
>  v4 Changes:
>  - Fix packet len calculation.
> 
>  v3 Changes:
>  - Split the patches for easier review. (Thomas)
>  - Remove unnecessary assignments to 0. (Bernard)
> 
>  v2 Changes:
>  - Use bulk ops for fetching segments. (Andrew Rybchenko)
>  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
>  - Fix mbufs not being freed when there is no more mbufs available for
> segments. (Andrew Rybchenko)
> 
>  app/test-pmd/txonly.c | 139 +++++++++++++++++++++++-------------------
>  1 file changed, 76 insertions(+), 63 deletions(-)
> 

The patch doesn't apply. It conflicts with 82010ef55 app/testpmd: make txonly mode generate multiple flows (http://patches.dpdk.org/patch/51869/).
Can you please rebase?

Thanks,
Ali

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-04-02  9:03   ` Ali Alnubani
@ 2019-04-02  9:03     ` Ali Alnubani
  2019-04-02  9:06     ` Pavan Nikhilesh Bhagavatula
  1 sibling, 0 replies; 68+ messages in thread
From: Ali Alnubani @ 2019-04-02  9:03 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula
  Cc: dev, Jerin Jacob Kollanukkaran, Thomas Monjalon, arybchenko,
	ferruh.yigit, bernard.iremonger

Hi Pavan,

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Pavan Nikhilesh
> Bhagavatula
> Sent: Sunday, March 31, 2019 4:14 PM
> To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Thomas Monjalon
> <thomas@monjalon.net>; arybchenko@solarflare.com;
> ferruh.yigit@intel.com; bernard.iremonger@intel.com
> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> <pbhagavatula@marvell.com>
> Subject: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly
> mode
> 
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Optimize testpmd txonly mode by
> 1. Moving per packet ethernet header copy above the loop.
> 2. Use bulk ops for allocating segments instead of having a inner loop for
> every segment.
> 
> Also, move the packet prepare logic into a separate function so that it can be
> reused later.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>  v5 Changes
>  - Remove unnecessary change to struct rte_port *txp (movement).
> (Bernard)
> 
>  v4 Changes:
>  - Fix packet len calculation.
> 
>  v3 Changes:
>  - Split the patches for easier review. (Thomas)
>  - Remove unnecessary assignments to 0. (Bernard)
> 
>  v2 Changes:
>  - Use bulk ops for fetching segments. (Andrew Rybchenko)
>  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
>  - Fix mbufs not being freed when there is no more mbufs available for
> segments. (Andrew Rybchenko)
> 
>  app/test-pmd/txonly.c | 139 +++++++++++++++++++++++-------------------
>  1 file changed, 76 insertions(+), 63 deletions(-)
> 

The patch doesn't apply. It conflicts with 82010ef55 app/testpmd: make txonly mode generate multiple flows (http://patches.dpdk.org/patch/51869/).
Can you please rebase?

Thanks,
Ali

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-04-02  9:03   ` Ali Alnubani
  2019-04-02  9:03     ` Ali Alnubani
@ 2019-04-02  9:06     ` Pavan Nikhilesh Bhagavatula
  2019-04-02  9:06       ` Pavan Nikhilesh Bhagavatula
  1 sibling, 1 reply; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-04-02  9:06 UTC (permalink / raw)
  To: Ali Alnubani
  Cc: dev, Jerin Jacob Kollanukkaran, Thomas Monjalon, arybchenko,
	ferruh.yigit, bernard.iremonger

Hi Ali,

> -----Original Message-----
> From: Ali Alnubani <alialnu@mellanox.com>
> Sent: Tuesday, April 2, 2019 2:33 PM
> To: Pavan Nikhilesh Bhagavatula <pbhagavatula@marvell.com>
> Cc: dev@dpdk.org; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Thomas
> Monjalon <thomas@monjalon.net>; arybchenko@solarflare.com;
> ferruh.yigit@intel.com; bernard.iremonger@intel.com
> Subject: [EXT] RE: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd
> txonly mode
> 
> External Email
> 
> ----------------------------------------------------------------------
> Hi Pavan,
> 
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Pavan Nikhilesh
> > Bhagavatula
> > Sent: Sunday, March 31, 2019 4:14 PM
> > To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Thomas Monjalon
> > <thomas@monjalon.net>; arybchenko@solarflare.com;
> > ferruh.yigit@intel.com; bernard.iremonger@intel.com
> > Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> > <pbhagavatula@marvell.com>
> > Subject: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd
> > txonly mode
> >
> > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >
> > Optimize testpmd txonly mode by
> > 1. Moving per packet ethernet header copy above the loop.
> > 2. Use bulk ops for allocating segments instead of having a inner loop
> > for every segment.
> >
> > Also, move the packet prepare logic into a separate function so that
> > it can be reused later.
> >
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > ---
> >  v5 Changes
> >  - Remove unnecessary change to struct rte_port *txp (movement).
> > (Bernard)
> >
> >  v4 Changes:
> >  - Fix packet len calculation.
> >
> >  v3 Changes:
> >  - Split the patches for easier review. (Thomas)
> >  - Remove unnecessary assignments to 0. (Bernard)
> >
> >  v2 Changes:
> >  - Use bulk ops for fetching segments. (Andrew Rybchenko)
> >  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew
> > Rybchenko)
> >  - Fix mbufs not being freed when there is no more mbufs available for
> > segments. (Andrew Rybchenko)
> >
> >  app/test-pmd/txonly.c | 139
> > +++++++++++++++++++++++-------------------
> >  1 file changed, 76 insertions(+), 63 deletions(-)
> >
> 
> The patch doesn't apply. It conflicts with 82010ef55 app/testpmd: make
> txonly mode generate multiple flows
> (http://patches.dpdk.org/patch/51869/).
> Can you please rebase?

Yes, I'm in the process of rebasing/splitting the patch further.

> 
> Thanks,
> Ali

Regards,
Pavan.

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd txonly mode
  2019-04-02  9:06     ` Pavan Nikhilesh Bhagavatula
@ 2019-04-02  9:06       ` Pavan Nikhilesh Bhagavatula
  0 siblings, 0 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-04-02  9:06 UTC (permalink / raw)
  To: Ali Alnubani
  Cc: dev, Jerin Jacob Kollanukkaran, Thomas Monjalon, arybchenko,
	ferruh.yigit, bernard.iremonger

Hi Ali,

> -----Original Message-----
> From: Ali Alnubani <alialnu@mellanox.com>
> Sent: Tuesday, April 2, 2019 2:33 PM
> To: Pavan Nikhilesh Bhagavatula <pbhagavatula@marvell.com>
> Cc: dev@dpdk.org; Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Thomas
> Monjalon <thomas@monjalon.net>; arybchenko@solarflare.com;
> ferruh.yigit@intel.com; bernard.iremonger@intel.com
> Subject: [EXT] RE: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd
> txonly mode
> 
> External Email
> 
> ----------------------------------------------------------------------
> Hi Pavan,
> 
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Pavan Nikhilesh
> > Bhagavatula
> > Sent: Sunday, March 31, 2019 4:14 PM
> > To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Thomas Monjalon
> > <thomas@monjalon.net>; arybchenko@solarflare.com;
> > ferruh.yigit@intel.com; bernard.iremonger@intel.com
> > Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> > <pbhagavatula@marvell.com>
> > Subject: [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd
> > txonly mode
> >
> > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >
> > Optimize testpmd txonly mode by
> > 1. Moving per packet ethernet header copy above the loop.
> > 2. Use bulk ops for allocating segments instead of having a inner loop
> > for every segment.
> >
> > Also, move the packet prepare logic into a separate function so that
> > it can be reused later.
> >
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > ---
> >  v5 Changes
> >  - Remove unnecessary change to struct rte_port *txp (movement).
> > (Bernard)
> >
> >  v4 Changes:
> >  - Fix packet len calculation.
> >
> >  v3 Changes:
> >  - Split the patches for easier review. (Thomas)
> >  - Remove unnecessary assignments to 0. (Bernard)
> >
> >  v2 Changes:
> >  - Use bulk ops for fetching segments. (Andrew Rybchenko)
> >  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew
> > Rybchenko)
> >  - Fix mbufs not being freed when there is no more mbufs available for
> > segments. (Andrew Rybchenko)
> >
> >  app/test-pmd/txonly.c | 139
> > +++++++++++++++++++++++-------------------
> >  1 file changed, 76 insertions(+), 63 deletions(-)
> >
> 
> The patch doesn't apply. It conflicts with 82010ef55 app/testpmd: make
> txonly mode generate multiple flows
> (http://patches.dpdk.org/patch/51869/).
> Can you please rebase?

Yes, I'm in the process of rebasing/splitting the patch further.

> 
> Thanks,
> Ali

Regards,
Pavan.

^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop
  2019-02-28 19:42 [dpdk-dev] [PATCH] app/testpmd: use mempool bulk get for txonly mode Pavan Nikhilesh Bhagavatula
                   ` (4 preceding siblings ...)
  2019-03-31 13:14 ` [dpdk-dev] [PATCH v5 " Pavan Nikhilesh Bhagavatula
@ 2019-04-02  9:53 ` Pavan Nikhilesh Bhagavatula
  2019-04-02  9:53   ` Pavan Nikhilesh Bhagavatula
                     ` (6 more replies)
  5 siblings, 7 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-04-02  9:53 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger, alialnu
  Cc: dev, Pavan Nikhilesh Bhagavatula

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Testpmd txonly copies the src/dst mac address of the port being
processed to ethernet header structure on the stack for every packet.
Move it outside the loop and reuse it.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 v6 Changes
 - Rebase onto ToT.
 - Split the changes further

 v5 Changes
 - Remove unnecessary change to struct rte_port *txp (movement). (Bernard)

 v4 Changes:
 - Fix packet len calculation.

 v3 Changes:
 - Split the patches for easier review. (Thomas)
 - Remove unnecessary assignments to 0. (Bernard)

 v2 Changes:
 - Use bulk ops for fetching segments. (Andrew Rybchenko)
 - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
 - Fix mbufs not being freed when there is no more mbufs available for
 segments. (Andrew Rybchenko)

 app/test-pmd/txonly.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index def52a048..0d411dbf4 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -190,6 +190,14 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		ol_flags |= PKT_TX_QINQ_PKT;
 	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
 		ol_flags |= PKT_TX_MACSEC;
+
+	/*
+	 * Initialize Ethernet header.
+	 */
+	ether_addr_copy(&peer_eth_addrs[fs->peer_addr], &eth_hdr.d_addr);
+	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
+	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+
 	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
 		pkt = rte_mbuf_raw_alloc(mbp);
 		if (pkt == NULL) {
@@ -226,13 +234,6 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		}
 		pkt_seg->next = NULL; /* Last segment of packet. */

-		/*
-		 * Initialize Ethernet header.
-		 */
-		ether_addr_copy(&peer_eth_addrs[fs->peer_addr],&eth_hdr.d_addr);
-		ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
-		eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
-
 		/*
 		 * Copy headers in first packet segment(s).
 		 */
--
2.21.0

^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop
  2019-04-02  9:53 ` [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop Pavan Nikhilesh Bhagavatula
@ 2019-04-02  9:53   ` Pavan Nikhilesh Bhagavatula
  2019-04-02  9:53   ` [dpdk-dev] [PATCH v6 2/4] app/testpmd: use bulk ops for allocating segments Pavan Nikhilesh Bhagavatula
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-04-02  9:53 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger, alialnu
  Cc: dev, Pavan Nikhilesh Bhagavatula

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Testpmd txonly copies the src/dst mac address of the port being
processed to ethernet header structure on the stack for every packet.
Move it outside the loop and reuse it.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 v6 Changes
 - Rebase onto ToT.
 - Split the changes further

 v5 Changes
 - Remove unnecessary change to struct rte_port *txp (movement). (Bernard)

 v4 Changes:
 - Fix packet len calculation.

 v3 Changes:
 - Split the patches for easier review. (Thomas)
 - Remove unnecessary assignments to 0. (Bernard)

 v2 Changes:
 - Use bulk ops for fetching segments. (Andrew Rybchenko)
 - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
 - Fix mbufs not being freed when there is no more mbufs available for
 segments. (Andrew Rybchenko)

 app/test-pmd/txonly.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index def52a048..0d411dbf4 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -190,6 +190,14 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		ol_flags |= PKT_TX_QINQ_PKT;
 	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
 		ol_flags |= PKT_TX_MACSEC;
+
+	/*
+	 * Initialize Ethernet header.
+	 */
+	ether_addr_copy(&peer_eth_addrs[fs->peer_addr], &eth_hdr.d_addr);
+	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
+	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+
 	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
 		pkt = rte_mbuf_raw_alloc(mbp);
 		if (pkt == NULL) {
@@ -226,13 +234,6 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		}
 		pkt_seg->next = NULL; /* Last segment of packet. */

-		/*
-		 * Initialize Ethernet header.
-		 */
-		ether_addr_copy(&peer_eth_addrs[fs->peer_addr],&eth_hdr.d_addr);
-		ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
-		eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
-
 		/*
 		 * Copy headers in first packet segment(s).
 		 */
--
2.21.0


^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v6 2/4] app/testpmd: use bulk ops for allocating segments
  2019-04-02  9:53 ` [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop Pavan Nikhilesh Bhagavatula
  2019-04-02  9:53   ` Pavan Nikhilesh Bhagavatula
@ 2019-04-02  9:53   ` Pavan Nikhilesh Bhagavatula
  2019-04-02  9:53     ` Pavan Nikhilesh Bhagavatula
  2019-04-02  9:53   ` [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic into a separate function Pavan Nikhilesh Bhagavatula
                     ` (4 subsequent siblings)
  6 siblings, 1 reply; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-04-02  9:53 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger, alialnu
  Cc: dev, Pavan Nikhilesh Bhagavatula

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use bulk ops for allocating segments instead of having a inner loop
for every segment.
This reduces the number of calls to the mempool layer.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test-pmd/txonly.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 0d411dbf4..65171c1d1 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -155,6 +155,7 @@ static void
 pkt_burst_transmit(struct fwd_stream *fs)
 {
 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
 	struct rte_port *txp;
 	struct rte_mbuf *pkt;
 	struct rte_mbuf *pkt_seg;
@@ -216,18 +217,23 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		rte_pktmbuf_reset_headroom(pkt);
 		pkt->data_len = tx_pkt_seg_lengths[0];
 		pkt_seg = pkt;
+
 		if (tx_pkt_split == TX_PKT_SPLIT_RND)
 			nb_segs = random() % tx_pkt_nb_segs + 1;
 		else
 			nb_segs = tx_pkt_nb_segs;
-		pkt_len = pkt->data_len;
-		for (i = 1; i < nb_segs; i++) {
-			pkt_seg->next = rte_mbuf_raw_alloc(mbp);
-			if (pkt_seg->next == NULL) {
-				pkt->nb_segs = i;
+
+		if (nb_segs > 1) {
+			if (rte_mempool_get_bulk(mbp, (void **)pkt_segs,
+							nb_segs)) {
 				rte_pktmbuf_free(pkt);
 				goto nomore_mbuf;
 			}
+		}
+
+		pkt_len = pkt->data_len;
+		for (i = 1; i < nb_segs; i++) {
+			pkt_seg->next = pkt_segs[i - 1];
 			pkt_seg = pkt_seg->next;
 			pkt_seg->data_len = tx_pkt_seg_lengths[i];
 			pkt_len += pkt_seg->data_len;
-- 
2.21.0

^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v6 2/4] app/testpmd: use bulk ops for allocating segments
  2019-04-02  9:53   ` [dpdk-dev] [PATCH v6 2/4] app/testpmd: use bulk ops for allocating segments Pavan Nikhilesh Bhagavatula
@ 2019-04-02  9:53     ` Pavan Nikhilesh Bhagavatula
  0 siblings, 0 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-04-02  9:53 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger, alialnu
  Cc: dev, Pavan Nikhilesh Bhagavatula

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use bulk ops for allocating segments instead of having a inner loop
for every segment.
This reduces the number of calls to the mempool layer.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test-pmd/txonly.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 0d411dbf4..65171c1d1 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -155,6 +155,7 @@ static void
 pkt_burst_transmit(struct fwd_stream *fs)
 {
 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
 	struct rte_port *txp;
 	struct rte_mbuf *pkt;
 	struct rte_mbuf *pkt_seg;
@@ -216,18 +217,23 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		rte_pktmbuf_reset_headroom(pkt);
 		pkt->data_len = tx_pkt_seg_lengths[0];
 		pkt_seg = pkt;
+
 		if (tx_pkt_split == TX_PKT_SPLIT_RND)
 			nb_segs = random() % tx_pkt_nb_segs + 1;
 		else
 			nb_segs = tx_pkt_nb_segs;
-		pkt_len = pkt->data_len;
-		for (i = 1; i < nb_segs; i++) {
-			pkt_seg->next = rte_mbuf_raw_alloc(mbp);
-			if (pkt_seg->next == NULL) {
-				pkt->nb_segs = i;
+
+		if (nb_segs > 1) {
+			if (rte_mempool_get_bulk(mbp, (void **)pkt_segs,
+							nb_segs)) {
 				rte_pktmbuf_free(pkt);
 				goto nomore_mbuf;
 			}
+		}
+
+		pkt_len = pkt->data_len;
+		for (i = 1; i < nb_segs; i++) {
+			pkt_seg->next = pkt_segs[i - 1];
 			pkt_seg = pkt_seg->next;
 			pkt_seg->data_len = tx_pkt_seg_lengths[i];
 			pkt_len += pkt_seg->data_len;
-- 
2.21.0


^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic into a separate function
  2019-04-02  9:53 ` [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop Pavan Nikhilesh Bhagavatula
  2019-04-02  9:53   ` Pavan Nikhilesh Bhagavatula
  2019-04-02  9:53   ` [dpdk-dev] [PATCH v6 2/4] app/testpmd: use bulk ops for allocating segments Pavan Nikhilesh Bhagavatula
@ 2019-04-02  9:53   ` Pavan Nikhilesh Bhagavatula
  2019-04-02  9:53     ` Pavan Nikhilesh Bhagavatula
  2019-04-09  9:28     ` Lin, Xueqin
  2019-04-02  9:53   ` [dpdk-dev] [PATCH v6 4/4] app/testpmd: add mempool bulk get for txonly mode Pavan Nikhilesh Bhagavatula
                     ` (3 subsequent siblings)
  6 siblings, 2 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-04-02  9:53 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger, alialnu
  Cc: dev, Pavan Nikhilesh Bhagavatula

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Move the packet prepare logic into a separate function so that it
can be reused later.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test-pmd/txonly.c | 163 +++++++++++++++++++++---------------------
 1 file changed, 83 insertions(+), 80 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 65171c1d1..56ca0ad24 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -148,6 +148,80 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
 	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
 }
 
+static inline bool
+pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
+		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
+		const uint16_t vlan_tci_outer, const uint64_t ol_flags)
+{
+	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
+	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
+	struct rte_mbuf *pkt_seg;
+	uint32_t nb_segs, pkt_len;
+	uint8_t i;
+
+	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
+		nb_segs = random() % tx_pkt_nb_segs + 1;
+	else
+		nb_segs = tx_pkt_nb_segs;
+
+	if (nb_segs > 1) {
+		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs))
+			return false;
+	}
+
+	rte_pktmbuf_reset_headroom(pkt);
+	pkt->data_len = tx_pkt_seg_lengths[0];
+	pkt->ol_flags = ol_flags;
+	pkt->vlan_tci = vlan_tci;
+	pkt->vlan_tci_outer = vlan_tci_outer;
+	pkt->l2_len = sizeof(struct ether_hdr);
+	pkt->l3_len = sizeof(struct ipv4_hdr);
+
+	pkt_len = pkt->data_len;
+	pkt_seg = pkt;
+	for (i = 1; i < nb_segs; i++) {
+		pkt_seg->next = pkt_segs[i - 1];
+		pkt_seg = pkt_seg->next;
+		pkt_seg->data_len = tx_pkt_seg_lengths[i];
+		pkt_len += pkt_seg->data_len;
+	}
+	pkt_seg->next = NULL; /* Last segment of packet. */
+	/*
+	 * Copy headers in first packet segment(s).
+	 */
+	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
+	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
+			sizeof(struct ether_hdr));
+	if (txonly_multi_flow) {
+		struct ipv4_hdr *ip_hdr;
+		uint32_t addr;
+
+		ip_hdr = rte_pktmbuf_mtod_offset(pkt,
+				struct ipv4_hdr *,
+				sizeof(struct ether_hdr));
+		/*
+		 * Generate multiple flows by varying IP src addr. This
+		 * enables packets are well distributed by RSS in
+		 * receiver side if any and txonly mode can be a decent
+		 * packet generator for developer's quick performance
+		 * regression test.
+		 */
+		addr = (IP_DST_ADDR | (ip_var++ << 8)) + rte_lcore_id();
+		ip_hdr->src_addr = rte_cpu_to_be_32(addr);
+	}
+	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
+			sizeof(struct ether_hdr) +
+			sizeof(struct ipv4_hdr));
+	/*
+	 * Complete first mbuf of packet and append it to the
+	 * burst of packets to be transmitted.
+	 */
+	pkt->nb_segs = nb_segs;
+	pkt->pkt_len = pkt_len;
+
+	return true;
+}
+
 /*
  * Transmit a burst of multi-segments packets.
  */
@@ -155,10 +229,8 @@ static void
 pkt_burst_transmit(struct fwd_stream *fs)
 {
 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
-	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
 	struct rte_port *txp;
 	struct rte_mbuf *pkt;
-	struct rte_mbuf *pkt_seg;
 	struct rte_mempool *mbp;
 	struct ether_hdr eth_hdr;
 	uint16_t nb_tx;
@@ -166,15 +238,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	uint16_t vlan_tci, vlan_tci_outer;
 	uint32_t retry;
 	uint64_t ol_flags = 0;
-	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
-	uint8_t  i;
 	uint64_t tx_offloads;
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	uint64_t start_tsc;
 	uint64_t end_tsc;
 	uint64_t core_cycles;
 #endif
-	uint32_t nb_segs, pkt_len;
 
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	start_tsc = rte_rdtsc();
@@ -201,85 +270,19 @@ pkt_burst_transmit(struct fwd_stream *fs)
 
 	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
 		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL) {
-		nomore_mbuf:
-			if (nb_pkt == 0)
-				return;
+		if (pkt == NULL)
+			break;
+		if (unlikely(!pkt_burst_prepare(pkt, mbp, &eth_hdr, vlan_tci,
+						vlan_tci_outer, ol_flags))) {
+			rte_pktmbuf_free(pkt);
 			break;
 		}
-
-		/*
-		 * Using raw alloc is good to improve performance,
-		 * but some consumers may use the headroom and so
-		 * decrement data_off. We need to make sure it is
-		 * reset to default value.
-		 */
-		rte_pktmbuf_reset_headroom(pkt);
-		pkt->data_len = tx_pkt_seg_lengths[0];
-		pkt_seg = pkt;
-
-		if (tx_pkt_split == TX_PKT_SPLIT_RND)
-			nb_segs = random() % tx_pkt_nb_segs + 1;
-		else
-			nb_segs = tx_pkt_nb_segs;
-
-		if (nb_segs > 1) {
-			if (rte_mempool_get_bulk(mbp, (void **)pkt_segs,
-							nb_segs)) {
-				rte_pktmbuf_free(pkt);
-				goto nomore_mbuf;
-			}
-		}
-
-		pkt_len = pkt->data_len;
-		for (i = 1; i < nb_segs; i++) {
-			pkt_seg->next = pkt_segs[i - 1];
-			pkt_seg = pkt_seg->next;
-			pkt_seg->data_len = tx_pkt_seg_lengths[i];
-			pkt_len += pkt_seg->data_len;
-		}
-		pkt_seg->next = NULL; /* Last segment of packet. */
-
-		/*
-		 * Copy headers in first packet segment(s).
-		 */
-		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
-		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
-				sizeof(struct ether_hdr));
-		if (txonly_multi_flow) {
-			struct ipv4_hdr *ip_hdr;
-			uint32_t addr;
-
-			ip_hdr = rte_pktmbuf_mtod_offset(pkt,
-					struct ipv4_hdr *,
-					sizeof(struct ether_hdr));
-			/*
-			 * Generate multiple flows by varying IP src addr. This
-			 * enables packets are well distributed by RSS in
-			 * receiver side if any and txonly mode can be a decent
-			 * packet generator for developer's quick performance
-			 * regression test.
-			 */
-			addr = (IP_DST_ADDR | (ip_var++ << 8)) + rte_lcore_id();
-			ip_hdr->src_addr = rte_cpu_to_be_32(addr);
-		}
-		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
-				sizeof(struct ether_hdr) +
-				sizeof(struct ipv4_hdr));
-
-		/*
-		 * Complete first mbuf of packet and append it to the
-		 * burst of packets to be transmitted.
-		 */
-		pkt->nb_segs = nb_segs;
-		pkt->pkt_len = pkt_len;
-		pkt->ol_flags = ol_flags;
-		pkt->vlan_tci = vlan_tci;
-		pkt->vlan_tci_outer = vlan_tci_outer;
-		pkt->l2_len = sizeof(struct ether_hdr);
-		pkt->l3_len = sizeof(struct ipv4_hdr);
 		pkts_burst[nb_pkt] = pkt;
 	}
+
+	if (nb_pkt == 0)
+		return;
+
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
 	/*
 	 * Retry if necessary
-- 
2.21.0

^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic into a separate function
  2019-04-02  9:53   ` [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic into a separate function Pavan Nikhilesh Bhagavatula
@ 2019-04-02  9:53     ` Pavan Nikhilesh Bhagavatula
  2019-04-09  9:28     ` Lin, Xueqin
  1 sibling, 0 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-04-02  9:53 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger, alialnu
  Cc: dev, Pavan Nikhilesh Bhagavatula

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Move the packet prepare logic into a separate function so that it
can be reused later.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test-pmd/txonly.c | 163 +++++++++++++++++++++---------------------
 1 file changed, 83 insertions(+), 80 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 65171c1d1..56ca0ad24 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -148,6 +148,80 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
 	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
 }
 
+static inline bool
+pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
+		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
+		const uint16_t vlan_tci_outer, const uint64_t ol_flags)
+{
+	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
+	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
+	struct rte_mbuf *pkt_seg;
+	uint32_t nb_segs, pkt_len;
+	uint8_t i;
+
+	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
+		nb_segs = random() % tx_pkt_nb_segs + 1;
+	else
+		nb_segs = tx_pkt_nb_segs;
+
+	if (nb_segs > 1) {
+		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs))
+			return false;
+	}
+
+	rte_pktmbuf_reset_headroom(pkt);
+	pkt->data_len = tx_pkt_seg_lengths[0];
+	pkt->ol_flags = ol_flags;
+	pkt->vlan_tci = vlan_tci;
+	pkt->vlan_tci_outer = vlan_tci_outer;
+	pkt->l2_len = sizeof(struct ether_hdr);
+	pkt->l3_len = sizeof(struct ipv4_hdr);
+
+	pkt_len = pkt->data_len;
+	pkt_seg = pkt;
+	for (i = 1; i < nb_segs; i++) {
+		pkt_seg->next = pkt_segs[i - 1];
+		pkt_seg = pkt_seg->next;
+		pkt_seg->data_len = tx_pkt_seg_lengths[i];
+		pkt_len += pkt_seg->data_len;
+	}
+	pkt_seg->next = NULL; /* Last segment of packet. */
+	/*
+	 * Copy headers in first packet segment(s).
+	 */
+	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
+	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
+			sizeof(struct ether_hdr));
+	if (txonly_multi_flow) {
+		struct ipv4_hdr *ip_hdr;
+		uint32_t addr;
+
+		ip_hdr = rte_pktmbuf_mtod_offset(pkt,
+				struct ipv4_hdr *,
+				sizeof(struct ether_hdr));
+		/*
+		 * Generate multiple flows by varying IP src addr. This
+		 * enables packets are well distributed by RSS in
+		 * receiver side if any and txonly mode can be a decent
+		 * packet generator for developer's quick performance
+		 * regression test.
+		 */
+		addr = (IP_DST_ADDR | (ip_var++ << 8)) + rte_lcore_id();
+		ip_hdr->src_addr = rte_cpu_to_be_32(addr);
+	}
+	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
+			sizeof(struct ether_hdr) +
+			sizeof(struct ipv4_hdr));
+	/*
+	 * Complete first mbuf of packet and append it to the
+	 * burst of packets to be transmitted.
+	 */
+	pkt->nb_segs = nb_segs;
+	pkt->pkt_len = pkt_len;
+
+	return true;
+}
+
 /*
  * Transmit a burst of multi-segments packets.
  */
@@ -155,10 +229,8 @@ static void
 pkt_burst_transmit(struct fwd_stream *fs)
 {
 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
-	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
 	struct rte_port *txp;
 	struct rte_mbuf *pkt;
-	struct rte_mbuf *pkt_seg;
 	struct rte_mempool *mbp;
 	struct ether_hdr eth_hdr;
 	uint16_t nb_tx;
@@ -166,15 +238,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	uint16_t vlan_tci, vlan_tci_outer;
 	uint32_t retry;
 	uint64_t ol_flags = 0;
-	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
-	uint8_t  i;
 	uint64_t tx_offloads;
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	uint64_t start_tsc;
 	uint64_t end_tsc;
 	uint64_t core_cycles;
 #endif
-	uint32_t nb_segs, pkt_len;
 
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
 	start_tsc = rte_rdtsc();
@@ -201,85 +270,19 @@ pkt_burst_transmit(struct fwd_stream *fs)
 
 	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
 		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL) {
-		nomore_mbuf:
-			if (nb_pkt == 0)
-				return;
+		if (pkt == NULL)
+			break;
+		if (unlikely(!pkt_burst_prepare(pkt, mbp, &eth_hdr, vlan_tci,
+						vlan_tci_outer, ol_flags))) {
+			rte_pktmbuf_free(pkt);
 			break;
 		}
-
-		/*
-		 * Using raw alloc is good to improve performance,
-		 * but some consumers may use the headroom and so
-		 * decrement data_off. We need to make sure it is
-		 * reset to default value.
-		 */
-		rte_pktmbuf_reset_headroom(pkt);
-		pkt->data_len = tx_pkt_seg_lengths[0];
-		pkt_seg = pkt;
-
-		if (tx_pkt_split == TX_PKT_SPLIT_RND)
-			nb_segs = random() % tx_pkt_nb_segs + 1;
-		else
-			nb_segs = tx_pkt_nb_segs;
-
-		if (nb_segs > 1) {
-			if (rte_mempool_get_bulk(mbp, (void **)pkt_segs,
-							nb_segs)) {
-				rte_pktmbuf_free(pkt);
-				goto nomore_mbuf;
-			}
-		}
-
-		pkt_len = pkt->data_len;
-		for (i = 1; i < nb_segs; i++) {
-			pkt_seg->next = pkt_segs[i - 1];
-			pkt_seg = pkt_seg->next;
-			pkt_seg->data_len = tx_pkt_seg_lengths[i];
-			pkt_len += pkt_seg->data_len;
-		}
-		pkt_seg->next = NULL; /* Last segment of packet. */
-
-		/*
-		 * Copy headers in first packet segment(s).
-		 */
-		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
-		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
-				sizeof(struct ether_hdr));
-		if (txonly_multi_flow) {
-			struct ipv4_hdr *ip_hdr;
-			uint32_t addr;
-
-			ip_hdr = rte_pktmbuf_mtod_offset(pkt,
-					struct ipv4_hdr *,
-					sizeof(struct ether_hdr));
-			/*
-			 * Generate multiple flows by varying IP src addr. This
-			 * enables packets are well distributed by RSS in
-			 * receiver side if any and txonly mode can be a decent
-			 * packet generator for developer's quick performance
-			 * regression test.
-			 */
-			addr = (IP_DST_ADDR | (ip_var++ << 8)) + rte_lcore_id();
-			ip_hdr->src_addr = rte_cpu_to_be_32(addr);
-		}
-		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
-				sizeof(struct ether_hdr) +
-				sizeof(struct ipv4_hdr));
-
-		/*
-		 * Complete first mbuf of packet and append it to the
-		 * burst of packets to be transmitted.
-		 */
-		pkt->nb_segs = nb_segs;
-		pkt->pkt_len = pkt_len;
-		pkt->ol_flags = ol_flags;
-		pkt->vlan_tci = vlan_tci;
-		pkt->vlan_tci_outer = vlan_tci_outer;
-		pkt->l2_len = sizeof(struct ether_hdr);
-		pkt->l3_len = sizeof(struct ipv4_hdr);
 		pkts_burst[nb_pkt] = pkt;
 	}
+
+	if (nb_pkt == 0)
+		return;
+
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
 	/*
 	 * Retry if necessary
-- 
2.21.0


^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v6 4/4] app/testpmd: add mempool bulk get for txonly mode
  2019-04-02  9:53 ` [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop Pavan Nikhilesh Bhagavatula
                     ` (2 preceding siblings ...)
  2019-04-02  9:53   ` [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic into a separate function Pavan Nikhilesh Bhagavatula
@ 2019-04-02  9:53   ` Pavan Nikhilesh Bhagavatula
  2019-04-02  9:53     ` Pavan Nikhilesh Bhagavatula
  2019-04-02 15:21   ` [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop Raslan Darawsheh
                     ` (2 subsequent siblings)
  6 siblings, 1 reply; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-04-02  9:53 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger, alialnu
  Cc: dev, Pavan Nikhilesh Bhagavatula, Yingya Han

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use mempool bulk get ops to alloc burst of packets and process them.
If bulk get fails fallback to rte_mbuf_raw_alloc.

Tested-by: Yingya Han <yingyax.han@intel.com>
Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test-pmd/txonly.c | 35 ++++++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 56ca0ad24..66e63788a 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -268,16 +268,33 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
 	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
 
-	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
-		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL)
-			break;
-		if (unlikely(!pkt_burst_prepare(pkt, mbp, &eth_hdr, vlan_tci,
-						vlan_tci_outer, ol_flags))) {
-			rte_pktmbuf_free(pkt);
-			break;
+	if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
+				nb_pkt_per_burst) == 0) {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put_bulk(mbp,
+						(void **)&pkts_burst[nb_pkt],
+						nb_pkt_per_burst - nb_pkt);
+				break;
+			}
+		}
+	} else {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			pkt = rte_mbuf_raw_alloc(mbp);
+			if (pkt == NULL)
+				break;
+			if (unlikely(!pkt_burst_prepare(pkt, mbp, &eth_hdr,
+							vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_pktmbuf_free(pkt);
+				break;
+			}
+			pkts_burst[nb_pkt] = pkt;
 		}
-		pkts_burst[nb_pkt] = pkt;
 	}
 
 	if (nb_pkt == 0)
-- 
2.21.0

^ permalink raw reply	[flat|nested] 68+ messages in thread

* [dpdk-dev] [PATCH v6 4/4] app/testpmd: add mempool bulk get for txonly mode
  2019-04-02  9:53   ` [dpdk-dev] [PATCH v6 4/4] app/testpmd: add mempool bulk get for txonly mode Pavan Nikhilesh Bhagavatula
@ 2019-04-02  9:53     ` Pavan Nikhilesh Bhagavatula
  0 siblings, 0 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-04-02  9:53 UTC (permalink / raw)
  To: Jerin Jacob Kollanukkaran, thomas, arybchenko, ferruh.yigit,
	bernard.iremonger, alialnu
  Cc: dev, Pavan Nikhilesh Bhagavatula, Yingya Han

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use mempool bulk get ops to alloc burst of packets and process them.
If bulk get fails fallback to rte_mbuf_raw_alloc.

Tested-by: Yingya Han <yingyax.han@intel.com>
Suggested-by: Andrew Rybchenko <arybchenko@solarflare.com>
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test-pmd/txonly.c | 35 ++++++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 56ca0ad24..66e63788a 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -268,16 +268,33 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
 	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
 
-	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
-		pkt = rte_mbuf_raw_alloc(mbp);
-		if (pkt == NULL)
-			break;
-		if (unlikely(!pkt_burst_prepare(pkt, mbp, &eth_hdr, vlan_tci,
-						vlan_tci_outer, ol_flags))) {
-			rte_pktmbuf_free(pkt);
-			break;
+	if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
+				nb_pkt_per_burst) == 0) {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
+							&eth_hdr, vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_mempool_put_bulk(mbp,
+						(void **)&pkts_burst[nb_pkt],
+						nb_pkt_per_burst - nb_pkt);
+				break;
+			}
+		}
+	} else {
+		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+			pkt = rte_mbuf_raw_alloc(mbp);
+			if (pkt == NULL)
+				break;
+			if (unlikely(!pkt_burst_prepare(pkt, mbp, &eth_hdr,
+							vlan_tci,
+							vlan_tci_outer,
+							ol_flags))) {
+				rte_pktmbuf_free(pkt);
+				break;
+			}
+			pkts_burst[nb_pkt] = pkt;
 		}
-		pkts_burst[nb_pkt] = pkt;
 	}
 
 	if (nb_pkt == 0)
-- 
2.21.0


^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop
  2019-04-02  9:53 ` [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop Pavan Nikhilesh Bhagavatula
                     ` (3 preceding siblings ...)
  2019-04-02  9:53   ` [dpdk-dev] [PATCH v6 4/4] app/testpmd: add mempool bulk get for txonly mode Pavan Nikhilesh Bhagavatula
@ 2019-04-02 15:21   ` Raslan Darawsheh
  2019-04-02 15:21     ` Raslan Darawsheh
  2019-04-04 16:23   ` Ferruh Yigit
  2019-04-05 17:37   ` Ferruh Yigit
  6 siblings, 1 reply; 68+ messages in thread
From: Raslan Darawsheh @ 2019-04-02 15:21 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran,
	Thomas Monjalon, arybchenko, ferruh.yigit, bernard.iremonger,
	Ali Alnubani
  Cc: dev

Hi,

The performance issue that we saw in Mellanox is now fixed with latest version:

The issue was with the packet len calculation that was changed in the latest versions > 4:
diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index af0be89..2f40949 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -176,6 +176,7 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
        pkt->l3_len = sizeof(struct ipv4_hdr);

        pkt_seg = pkt;
+       pkt_len = pkt->data_len;
        for (i = 1; i < nb_segs; i++) {
                pkt_seg->next = pkt_segs[i - 1];
                pkt_seg = pkt_seg->next;
@@ -198,7 +199,7 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
         * burst of packets to be transmitted.
         */
        pkt->nb_segs = nb_segs;
-       pkt->pkt_len += pkt_len;
+       pkt->pkt_len = pkt_len;

        return true;

and now we see ~20% improvement in performance.

Kindest regards,
Raslan Darawsheh

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Pavan Nikhilesh
> Bhagavatula
> Sent: Tuesday, April 2, 2019 12:53 PM
> To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Thomas Monjalon
> <thomas@monjalon.net>; arybchenko@solarflare.com;
> ferruh.yigit@intel.com; bernard.iremonger@intel.com; Ali Alnubani
> <alialnu@mellanox.com>
> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> <pbhagavatula@marvell.com>
> Subject: [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header
> generation outside the loop
> 
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Testpmd txonly copies the src/dst mac address of the port being
> processed to ethernet header structure on the stack for every packet.
> Move it outside the loop and reuse it.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>  v6 Changes
>  - Rebase onto ToT.
>  - Split the changes further
> 
>  v5 Changes
>  - Remove unnecessary change to struct rte_port *txp (movement).
> (Bernard)
> 
>  v4 Changes:
>  - Fix packet len calculation.
> 
>  v3 Changes:
>  - Split the patches for easier review. (Thomas)
>  - Remove unnecessary assignments to 0. (Bernard)
> 
>  v2 Changes:
>  - Use bulk ops for fetching segments. (Andrew Rybchenko)
>  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
>  - Fix mbufs not being freed when there is no more mbufs available for
>  segments. (Andrew Rybchenko)
> 
>  app/test-pmd/txonly.c | 15 ++++++++-------
>  1 file changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
> index def52a048..0d411dbf4 100644
> --- a/app/test-pmd/txonly.c
> +++ b/app/test-pmd/txonly.c
> @@ -190,6 +190,14 @@ pkt_burst_transmit(struct fwd_stream *fs)
>  		ol_flags |= PKT_TX_QINQ_PKT;
>  	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
>  		ol_flags |= PKT_TX_MACSEC;
> +
> +	/*
> +	 * Initialize Ethernet header.
> +	 */
> +	ether_addr_copy(&peer_eth_addrs[fs->peer_addr],
> &eth_hdr.d_addr);
> +	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
> +	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> +
>  	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
>  		pkt = rte_mbuf_raw_alloc(mbp);
>  		if (pkt == NULL) {
> @@ -226,13 +234,6 @@ pkt_burst_transmit(struct fwd_stream *fs)
>  		}
>  		pkt_seg->next = NULL; /* Last segment of packet. */
> 
> -		/*
> -		 * Initialize Ethernet header.
> -		 */
> -		ether_addr_copy(&peer_eth_addrs[fs-
> >peer_addr],&eth_hdr.d_addr);
> -		ether_addr_copy(&ports[fs->tx_port].eth_addr,
> &eth_hdr.s_addr);
> -		eth_hdr.ether_type =
> rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> -
>  		/*
>  		 * Copy headers in first packet segment(s).
>  		 */
> --
> 2.21.0

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop
  2019-04-02 15:21   ` [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop Raslan Darawsheh
@ 2019-04-02 15:21     ` Raslan Darawsheh
  0 siblings, 0 replies; 68+ messages in thread
From: Raslan Darawsheh @ 2019-04-02 15:21 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran,
	Thomas Monjalon, arybchenko, ferruh.yigit, bernard.iremonger,
	Ali Alnubani
  Cc: dev

Hi,

The performance issue that we saw in Mellanox is now fixed with latest version:

The issue was with the packet len calculation that was changed in the latest versions > 4:
diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index af0be89..2f40949 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -176,6 +176,7 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
        pkt->l3_len = sizeof(struct ipv4_hdr);

        pkt_seg = pkt;
+       pkt_len = pkt->data_len;
        for (i = 1; i < nb_segs; i++) {
                pkt_seg->next = pkt_segs[i - 1];
                pkt_seg = pkt_seg->next;
@@ -198,7 +199,7 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
         * burst of packets to be transmitted.
         */
        pkt->nb_segs = nb_segs;
-       pkt->pkt_len += pkt_len;
+       pkt->pkt_len = pkt_len;

        return true;

and now we see ~20% improvement in performance.

Kindest regards,
Raslan Darawsheh

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Pavan Nikhilesh
> Bhagavatula
> Sent: Tuesday, April 2, 2019 12:53 PM
> To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Thomas Monjalon
> <thomas@monjalon.net>; arybchenko@solarflare.com;
> ferruh.yigit@intel.com; bernard.iremonger@intel.com; Ali Alnubani
> <alialnu@mellanox.com>
> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> <pbhagavatula@marvell.com>
> Subject: [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header
> generation outside the loop
> 
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Testpmd txonly copies the src/dst mac address of the port being
> processed to ethernet header structure on the stack for every packet.
> Move it outside the loop and reuse it.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>  v6 Changes
>  - Rebase onto ToT.
>  - Split the changes further
> 
>  v5 Changes
>  - Remove unnecessary change to struct rte_port *txp (movement).
> (Bernard)
> 
>  v4 Changes:
>  - Fix packet len calculation.
> 
>  v3 Changes:
>  - Split the patches for easier review. (Thomas)
>  - Remove unnecessary assignments to 0. (Bernard)
> 
>  v2 Changes:
>  - Use bulk ops for fetching segments. (Andrew Rybchenko)
>  - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
>  - Fix mbufs not being freed when there is no more mbufs available for
>  segments. (Andrew Rybchenko)
> 
>  app/test-pmd/txonly.c | 15 ++++++++-------
>  1 file changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
> index def52a048..0d411dbf4 100644
> --- a/app/test-pmd/txonly.c
> +++ b/app/test-pmd/txonly.c
> @@ -190,6 +190,14 @@ pkt_burst_transmit(struct fwd_stream *fs)
>  		ol_flags |= PKT_TX_QINQ_PKT;
>  	if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
>  		ol_flags |= PKT_TX_MACSEC;
> +
> +	/*
> +	 * Initialize Ethernet header.
> +	 */
> +	ether_addr_copy(&peer_eth_addrs[fs->peer_addr],
> &eth_hdr.d_addr);
> +	ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.s_addr);
> +	eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> +
>  	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
>  		pkt = rte_mbuf_raw_alloc(mbp);
>  		if (pkt == NULL) {
> @@ -226,13 +234,6 @@ pkt_burst_transmit(struct fwd_stream *fs)
>  		}
>  		pkt_seg->next = NULL; /* Last segment of packet. */
> 
> -		/*
> -		 * Initialize Ethernet header.
> -		 */
> -		ether_addr_copy(&peer_eth_addrs[fs-
> >peer_addr],&eth_hdr.d_addr);
> -		ether_addr_copy(&ports[fs->tx_port].eth_addr,
> &eth_hdr.s_addr);
> -		eth_hdr.ether_type =
> rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> -
>  		/*
>  		 * Copy headers in first packet segment(s).
>  		 */
> --
> 2.21.0


^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop
  2019-04-02  9:53 ` [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop Pavan Nikhilesh Bhagavatula
                     ` (4 preceding siblings ...)
  2019-04-02 15:21   ` [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop Raslan Darawsheh
@ 2019-04-04 16:23   ` Ferruh Yigit
  2019-04-04 16:23     ` Ferruh Yigit
  2019-04-05 17:37   ` Ferruh Yigit
  6 siblings, 1 reply; 68+ messages in thread
From: Ferruh Yigit @ 2019-04-04 16:23 UTC (permalink / raw)
  To: bernard.iremonger, Jingjing Wu, Wenzhuo Lu
  Cc: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran, thomas,
	arybchenko, alialnu, dev

On 4/2/2019 10:53 AM, Pavan Nikhilesh Bhagavatula wrote:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Testpmd txonly copies the src/dst mac address of the port being
> processed to ethernet header structure on the stack for every packet.
> Move it outside the loop and reuse it.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>

Hi Bernard, Jingjing, Wenzhuo,

Can you please check this testpmd patchset?

Thanks,
ferruh

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop
  2019-04-04 16:23   ` Ferruh Yigit
@ 2019-04-04 16:23     ` Ferruh Yigit
  0 siblings, 0 replies; 68+ messages in thread
From: Ferruh Yigit @ 2019-04-04 16:23 UTC (permalink / raw)
  To: bernard.iremonger, Jingjing Wu, Wenzhuo Lu
  Cc: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran, thomas,
	arybchenko, alialnu, dev

On 4/2/2019 10:53 AM, Pavan Nikhilesh Bhagavatula wrote:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Testpmd txonly copies the src/dst mac address of the port being
> processed to ethernet header structure on the stack for every packet.
> Move it outside the loop and reuse it.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>

Hi Bernard, Jingjing, Wenzhuo,

Can you please check this testpmd patchset?

Thanks,
ferruh

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop
  2019-04-02  9:53 ` [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop Pavan Nikhilesh Bhagavatula
                     ` (5 preceding siblings ...)
  2019-04-04 16:23   ` Ferruh Yigit
@ 2019-04-05 17:37   ` Ferruh Yigit
  2019-04-05 17:37     ` Ferruh Yigit
  6 siblings, 1 reply; 68+ messages in thread
From: Ferruh Yigit @ 2019-04-05 17:37 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran, thomas,
	arybchenko, bernard.iremonger, alialnu
  Cc: dev

On 4/2/2019 10:53 AM, Pavan Nikhilesh Bhagavatula wrote:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Testpmd txonly copies the src/dst mac address of the port being
> processed to ethernet header structure on the stack for every packet.
> Move it outside the loop and reuse it.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>

For series,
Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>

Series applied to dpdk-next-net/master, thanks.

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop
  2019-04-05 17:37   ` Ferruh Yigit
@ 2019-04-05 17:37     ` Ferruh Yigit
  0 siblings, 0 replies; 68+ messages in thread
From: Ferruh Yigit @ 2019-04-05 17:37 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran, thomas,
	arybchenko, bernard.iremonger, alialnu
  Cc: dev

On 4/2/2019 10:53 AM, Pavan Nikhilesh Bhagavatula wrote:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Testpmd txonly copies the src/dst mac address of the port being
> processed to ethernet header structure on the stack for every packet.
> Move it outside the loop and reuse it.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>

For series,
Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>

Series applied to dpdk-next-net/master, thanks.

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic into a separate function
  2019-04-02  9:53   ` [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic into a separate function Pavan Nikhilesh Bhagavatula
  2019-04-02  9:53     ` Pavan Nikhilesh Bhagavatula
@ 2019-04-09  9:28     ` Lin, Xueqin
  2019-04-09  9:28       ` Lin, Xueqin
  2019-04-09  9:32       ` Pavan Nikhilesh Bhagavatula
  1 sibling, 2 replies; 68+ messages in thread
From: Lin, Xueqin @ 2019-04-09  9:28 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Yigit, Ferruh
  Cc: dev, Xu, Qian Q, Li, WenjieX A, Wang, FengqinX, Yao, Lei A, Wang,
	Yinan, Jerin Jacob Kollanukkaran, thomas, arybchenko, Iremonger,
	Bernard, alialnu, Zhang, Qi Z

Hi NIkhilesh,

This patchset impacts some of 19.05 rc1 txonly/burst tests on Intel NIC. If set txonly fwd, IXIA or tester peer can't receive packets that sent from app generated. 
This is high issue, block some cases test. Detailed information as below, need you to check it soon.

*DPDK version:  19.05.0-rc1
*NIC hardware: Fortville_eagle/Fortville_spirit/Niantic
Environment: one NIC port connect with another NIC port, or one NIC port connect with IXIA

Test Setup
1. Bind port to igb_uio or vfio
2. On DUT, setup testpmd:
    ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x1e -n 4  -- -i --rxq=4 --txq=4 --port-topology=loop
3. Set txonly forward, start testpmd
testpmd>set fwd txonly
testpmd>start
4. Dump packets from tester NIC port or IXIA, find no packets were received on the PORT0.
tcpdump -i <tester_interface> -v

Best regards,
Xueqin

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pavan Nikhilesh
> Bhagavatula
> Sent: Tuesday, April 2, 2019 5:54 PM
> To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> thomas@monjalon.net; arybchenko@solarflare.com; Yigit, Ferruh
> <ferruh.yigit@intel.com>; Iremonger, Bernard
> <bernard.iremonger@intel.com>; alialnu@mellanox.com
> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> <pbhagavatula@marvell.com>
> Subject: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic
> into a separate function
> 
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Move the packet prepare logic into a separate function so that it can be
> reused later.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>  app/test-pmd/txonly.c | 163 +++++++++++++++++++++---------------------
>  1 file changed, 83 insertions(+), 80 deletions(-)
> 
> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index
> 65171c1d1..56ca0ad24 100644
> --- a/app/test-pmd/txonly.c
> +++ b/app/test-pmd/txonly.c
> @@ -148,6 +148,80 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
>  	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;  }
> 
> +static inline bool
> +pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
> +		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
> +		const uint16_t vlan_tci_outer, const uint64_t ol_flags) {
> +	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
> +	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
> +	struct rte_mbuf *pkt_seg;
> +	uint32_t nb_segs, pkt_len;
> +	uint8_t i;
> +
> +	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
> +		nb_segs = random() % tx_pkt_nb_segs + 1;
> +	else
> +		nb_segs = tx_pkt_nb_segs;
> +
> +	if (nb_segs > 1) {
> +		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs))
> +			return false;
> +	}
> +
> +	rte_pktmbuf_reset_headroom(pkt);
> +	pkt->data_len = tx_pkt_seg_lengths[0];
> +	pkt->ol_flags = ol_flags;
> +	pkt->vlan_tci = vlan_tci;
> +	pkt->vlan_tci_outer = vlan_tci_outer;
> +	pkt->l2_len = sizeof(struct ether_hdr);
> +	pkt->l3_len = sizeof(struct ipv4_hdr);
> +
> +	pkt_len = pkt->data_len;
> +	pkt_seg = pkt;
> +	for (i = 1; i < nb_segs; i++) {
> +		pkt_seg->next = pkt_segs[i - 1];
> +		pkt_seg = pkt_seg->next;
> +		pkt_seg->data_len = tx_pkt_seg_lengths[i];
> +		pkt_len += pkt_seg->data_len;
> +	}
> +	pkt_seg->next = NULL; /* Last segment of packet. */
> +	/*
> +	 * Copy headers in first packet segment(s).
> +	 */
> +	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
> +	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> +			sizeof(struct ether_hdr));
> +	if (txonly_multi_flow) {
> +		struct ipv4_hdr *ip_hdr;
> +		uint32_t addr;
> +
> +		ip_hdr = rte_pktmbuf_mtod_offset(pkt,
> +				struct ipv4_hdr *,
> +				sizeof(struct ether_hdr));
> +		/*
> +		 * Generate multiple flows by varying IP src addr. This
> +		 * enables packets are well distributed by RSS in
> +		 * receiver side if any and txonly mode can be a decent
> +		 * packet generator for developer's quick performance
> +		 * regression test.
> +		 */
> +		addr = (IP_DST_ADDR | (ip_var++ << 8)) + rte_lcore_id();
> +		ip_hdr->src_addr = rte_cpu_to_be_32(addr);
> +	}
> +	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> +			sizeof(struct ether_hdr) +
> +			sizeof(struct ipv4_hdr));
> +	/*
> +	 * Complete first mbuf of packet and append it to the
> +	 * burst of packets to be transmitted.
> +	 */
> +	pkt->nb_segs = nb_segs;
> +	pkt->pkt_len = pkt_len;
> +
> +	return true;
> +}
> +
>  /*
>   * Transmit a burst of multi-segments packets.
>   */
> @@ -155,10 +229,8 @@ static void
>  pkt_burst_transmit(struct fwd_stream *fs)  {
>  	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> -	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
>  	struct rte_port *txp;
>  	struct rte_mbuf *pkt;
> -	struct rte_mbuf *pkt_seg;
>  	struct rte_mempool *mbp;
>  	struct ether_hdr eth_hdr;
>  	uint16_t nb_tx;
> @@ -166,15 +238,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
>  	uint16_t vlan_tci, vlan_tci_outer;
>  	uint32_t retry;
>  	uint64_t ol_flags = 0;
> -	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
> -	uint8_t  i;
>  	uint64_t tx_offloads;
>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>  	uint64_t start_tsc;
>  	uint64_t end_tsc;
>  	uint64_t core_cycles;
>  #endif
> -	uint32_t nb_segs, pkt_len;
> 
>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>  	start_tsc = rte_rdtsc();
> @@ -201,85 +270,19 @@ pkt_burst_transmit(struct fwd_stream *fs)
> 
>  	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
>  		pkt = rte_mbuf_raw_alloc(mbp);
> -		if (pkt == NULL) {
> -		nomore_mbuf:
> -			if (nb_pkt == 0)
> -				return;
> +		if (pkt == NULL)
> +			break;
> +		if (unlikely(!pkt_burst_prepare(pkt, mbp, &eth_hdr, vlan_tci,
> +						vlan_tci_outer, ol_flags))) {
> +			rte_pktmbuf_free(pkt);
>  			break;
>  		}
> -
> -		/*
> -		 * Using raw alloc is good to improve performance,
> -		 * but some consumers may use the headroom and so
> -		 * decrement data_off. We need to make sure it is
> -		 * reset to default value.
> -		 */
> -		rte_pktmbuf_reset_headroom(pkt);
> -		pkt->data_len = tx_pkt_seg_lengths[0];
> -		pkt_seg = pkt;
> -
> -		if (tx_pkt_split == TX_PKT_SPLIT_RND)
> -			nb_segs = random() % tx_pkt_nb_segs + 1;
> -		else
> -			nb_segs = tx_pkt_nb_segs;
> -
> -		if (nb_segs > 1) {
> -			if (rte_mempool_get_bulk(mbp, (void **)pkt_segs,
> -							nb_segs)) {
> -				rte_pktmbuf_free(pkt);
> -				goto nomore_mbuf;
> -			}
> -		}
> -
> -		pkt_len = pkt->data_len;
> -		for (i = 1; i < nb_segs; i++) {
> -			pkt_seg->next = pkt_segs[i - 1];
> -			pkt_seg = pkt_seg->next;
> -			pkt_seg->data_len = tx_pkt_seg_lengths[i];
> -			pkt_len += pkt_seg->data_len;
> -		}
> -		pkt_seg->next = NULL; /* Last segment of packet. */
> -
> -		/*
> -		 * Copy headers in first packet segment(s).
> -		 */
> -		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
> -		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> -				sizeof(struct ether_hdr));
> -		if (txonly_multi_flow) {
> -			struct ipv4_hdr *ip_hdr;
> -			uint32_t addr;
> -
> -			ip_hdr = rte_pktmbuf_mtod_offset(pkt,
> -					struct ipv4_hdr *,
> -					sizeof(struct ether_hdr));
> -			/*
> -			 * Generate multiple flows by varying IP src addr. This
> -			 * enables packets are well distributed by RSS in
> -			 * receiver side if any and txonly mode can be a
> decent
> -			 * packet generator for developer's quick
> performance
> -			 * regression test.
> -			 */
> -			addr = (IP_DST_ADDR | (ip_var++ << 8)) +
> rte_lcore_id();
> -			ip_hdr->src_addr = rte_cpu_to_be_32(addr);
> -		}
> -		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> -				sizeof(struct ether_hdr) +
> -				sizeof(struct ipv4_hdr));
> -
> -		/*
> -		 * Complete first mbuf of packet and append it to the
> -		 * burst of packets to be transmitted.
> -		 */
> -		pkt->nb_segs = nb_segs;
> -		pkt->pkt_len = pkt_len;
> -		pkt->ol_flags = ol_flags;
> -		pkt->vlan_tci = vlan_tci;
> -		pkt->vlan_tci_outer = vlan_tci_outer;
> -		pkt->l2_len = sizeof(struct ether_hdr);
> -		pkt->l3_len = sizeof(struct ipv4_hdr);
>  		pkts_burst[nb_pkt] = pkt;
>  	}
> +
> +	if (nb_pkt == 0)
> +		return;
> +
>  	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
> nb_pkt);
>  	/*
>  	 * Retry if necessary
> --
> 2.21.0

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic into a separate function
  2019-04-09  9:28     ` Lin, Xueqin
@ 2019-04-09  9:28       ` Lin, Xueqin
  2019-04-09  9:32       ` Pavan Nikhilesh Bhagavatula
  1 sibling, 0 replies; 68+ messages in thread
From: Lin, Xueqin @ 2019-04-09  9:28 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Yigit, Ferruh
  Cc: dev, Xu, Qian Q, Li, WenjieX A, Wang, FengqinX, Yao, Lei A, Wang,
	Yinan, Jerin Jacob Kollanukkaran, thomas, arybchenko, Iremonger,
	Bernard, alialnu, Zhang, Qi Z

Hi NIkhilesh,

This patchset impacts some of 19.05 rc1 txonly/burst tests on Intel NIC. If set txonly fwd, IXIA or tester peer can't receive packets that sent from app generated. 
This is high issue, block some cases test. Detailed information as below, need you to check it soon.

*DPDK version:  19.05.0-rc1
*NIC hardware: Fortville_eagle/Fortville_spirit/Niantic
Environment: one NIC port connect with another NIC port, or one NIC port connect with IXIA

Test Setup
1. Bind port to igb_uio or vfio
2. On DUT, setup testpmd:
    ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x1e -n 4  -- -i --rxq=4 --txq=4 --port-topology=loop
3. Set txonly forward, start testpmd
testpmd>set fwd txonly
testpmd>start
4. Dump packets from tester NIC port or IXIA, find no packets were received on the PORT0.
tcpdump -i <tester_interface> -v

Best regards,
Xueqin

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pavan Nikhilesh
> Bhagavatula
> Sent: Tuesday, April 2, 2019 5:54 PM
> To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> thomas@monjalon.net; arybchenko@solarflare.com; Yigit, Ferruh
> <ferruh.yigit@intel.com>; Iremonger, Bernard
> <bernard.iremonger@intel.com>; alialnu@mellanox.com
> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> <pbhagavatula@marvell.com>
> Subject: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic
> into a separate function
> 
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Move the packet prepare logic into a separate function so that it can be
> reused later.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>  app/test-pmd/txonly.c | 163 +++++++++++++++++++++---------------------
>  1 file changed, 83 insertions(+), 80 deletions(-)
> 
> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index
> 65171c1d1..56ca0ad24 100644
> --- a/app/test-pmd/txonly.c
> +++ b/app/test-pmd/txonly.c
> @@ -148,6 +148,80 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
>  	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;  }
> 
> +static inline bool
> +pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
> +		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
> +		const uint16_t vlan_tci_outer, const uint64_t ol_flags) {
> +	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
> +	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
> +	struct rte_mbuf *pkt_seg;
> +	uint32_t nb_segs, pkt_len;
> +	uint8_t i;
> +
> +	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
> +		nb_segs = random() % tx_pkt_nb_segs + 1;
> +	else
> +		nb_segs = tx_pkt_nb_segs;
> +
> +	if (nb_segs > 1) {
> +		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs))
> +			return false;
> +	}
> +
> +	rte_pktmbuf_reset_headroom(pkt);
> +	pkt->data_len = tx_pkt_seg_lengths[0];
> +	pkt->ol_flags = ol_flags;
> +	pkt->vlan_tci = vlan_tci;
> +	pkt->vlan_tci_outer = vlan_tci_outer;
> +	pkt->l2_len = sizeof(struct ether_hdr);
> +	pkt->l3_len = sizeof(struct ipv4_hdr);
> +
> +	pkt_len = pkt->data_len;
> +	pkt_seg = pkt;
> +	for (i = 1; i < nb_segs; i++) {
> +		pkt_seg->next = pkt_segs[i - 1];
> +		pkt_seg = pkt_seg->next;
> +		pkt_seg->data_len = tx_pkt_seg_lengths[i];
> +		pkt_len += pkt_seg->data_len;
> +	}
> +	pkt_seg->next = NULL; /* Last segment of packet. */
> +	/*
> +	 * Copy headers in first packet segment(s).
> +	 */
> +	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
> +	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> +			sizeof(struct ether_hdr));
> +	if (txonly_multi_flow) {
> +		struct ipv4_hdr *ip_hdr;
> +		uint32_t addr;
> +
> +		ip_hdr = rte_pktmbuf_mtod_offset(pkt,
> +				struct ipv4_hdr *,
> +				sizeof(struct ether_hdr));
> +		/*
> +		 * Generate multiple flows by varying IP src addr. This
> +		 * enables packets are well distributed by RSS in
> +		 * receiver side if any and txonly mode can be a decent
> +		 * packet generator for developer's quick performance
> +		 * regression test.
> +		 */
> +		addr = (IP_DST_ADDR | (ip_var++ << 8)) + rte_lcore_id();
> +		ip_hdr->src_addr = rte_cpu_to_be_32(addr);
> +	}
> +	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> +			sizeof(struct ether_hdr) +
> +			sizeof(struct ipv4_hdr));
> +	/*
> +	 * Complete first mbuf of packet and append it to the
> +	 * burst of packets to be transmitted.
> +	 */
> +	pkt->nb_segs = nb_segs;
> +	pkt->pkt_len = pkt_len;
> +
> +	return true;
> +}
> +
>  /*
>   * Transmit a burst of multi-segments packets.
>   */
> @@ -155,10 +229,8 @@ static void
>  pkt_burst_transmit(struct fwd_stream *fs)  {
>  	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> -	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
>  	struct rte_port *txp;
>  	struct rte_mbuf *pkt;
> -	struct rte_mbuf *pkt_seg;
>  	struct rte_mempool *mbp;
>  	struct ether_hdr eth_hdr;
>  	uint16_t nb_tx;
> @@ -166,15 +238,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
>  	uint16_t vlan_tci, vlan_tci_outer;
>  	uint32_t retry;
>  	uint64_t ol_flags = 0;
> -	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
> -	uint8_t  i;
>  	uint64_t tx_offloads;
>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>  	uint64_t start_tsc;
>  	uint64_t end_tsc;
>  	uint64_t core_cycles;
>  #endif
> -	uint32_t nb_segs, pkt_len;
> 
>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>  	start_tsc = rte_rdtsc();
> @@ -201,85 +270,19 @@ pkt_burst_transmit(struct fwd_stream *fs)
> 
>  	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
>  		pkt = rte_mbuf_raw_alloc(mbp);
> -		if (pkt == NULL) {
> -		nomore_mbuf:
> -			if (nb_pkt == 0)
> -				return;
> +		if (pkt == NULL)
> +			break;
> +		if (unlikely(!pkt_burst_prepare(pkt, mbp, &eth_hdr, vlan_tci,
> +						vlan_tci_outer, ol_flags))) {
> +			rte_pktmbuf_free(pkt);
>  			break;
>  		}
> -
> -		/*
> -		 * Using raw alloc is good to improve performance,
> -		 * but some consumers may use the headroom and so
> -		 * decrement data_off. We need to make sure it is
> -		 * reset to default value.
> -		 */
> -		rte_pktmbuf_reset_headroom(pkt);
> -		pkt->data_len = tx_pkt_seg_lengths[0];
> -		pkt_seg = pkt;
> -
> -		if (tx_pkt_split == TX_PKT_SPLIT_RND)
> -			nb_segs = random() % tx_pkt_nb_segs + 1;
> -		else
> -			nb_segs = tx_pkt_nb_segs;
> -
> -		if (nb_segs > 1) {
> -			if (rte_mempool_get_bulk(mbp, (void **)pkt_segs,
> -							nb_segs)) {
> -				rte_pktmbuf_free(pkt);
> -				goto nomore_mbuf;
> -			}
> -		}
> -
> -		pkt_len = pkt->data_len;
> -		for (i = 1; i < nb_segs; i++) {
> -			pkt_seg->next = pkt_segs[i - 1];
> -			pkt_seg = pkt_seg->next;
> -			pkt_seg->data_len = tx_pkt_seg_lengths[i];
> -			pkt_len += pkt_seg->data_len;
> -		}
> -		pkt_seg->next = NULL; /* Last segment of packet. */
> -
> -		/*
> -		 * Copy headers in first packet segment(s).
> -		 */
> -		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
> -		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> -				sizeof(struct ether_hdr));
> -		if (txonly_multi_flow) {
> -			struct ipv4_hdr *ip_hdr;
> -			uint32_t addr;
> -
> -			ip_hdr = rte_pktmbuf_mtod_offset(pkt,
> -					struct ipv4_hdr *,
> -					sizeof(struct ether_hdr));
> -			/*
> -			 * Generate multiple flows by varying IP src addr. This
> -			 * enables packets are well distributed by RSS in
> -			 * receiver side if any and txonly mode can be a
> decent
> -			 * packet generator for developer's quick
> performance
> -			 * regression test.
> -			 */
> -			addr = (IP_DST_ADDR | (ip_var++ << 8)) +
> rte_lcore_id();
> -			ip_hdr->src_addr = rte_cpu_to_be_32(addr);
> -		}
> -		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> -				sizeof(struct ether_hdr) +
> -				sizeof(struct ipv4_hdr));
> -
> -		/*
> -		 * Complete first mbuf of packet and append it to the
> -		 * burst of packets to be transmitted.
> -		 */
> -		pkt->nb_segs = nb_segs;
> -		pkt->pkt_len = pkt_len;
> -		pkt->ol_flags = ol_flags;
> -		pkt->vlan_tci = vlan_tci;
> -		pkt->vlan_tci_outer = vlan_tci_outer;
> -		pkt->l2_len = sizeof(struct ether_hdr);
> -		pkt->l3_len = sizeof(struct ipv4_hdr);
>  		pkts_burst[nb_pkt] = pkt;
>  	}
> +
> +	if (nb_pkt == 0)
> +		return;
> +
>  	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
> nb_pkt);
>  	/*
>  	 * Retry if necessary
> --
> 2.21.0


^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic into a separate function
  2019-04-09  9:28     ` Lin, Xueqin
  2019-04-09  9:28       ` Lin, Xueqin
@ 2019-04-09  9:32       ` Pavan Nikhilesh Bhagavatula
  2019-04-09  9:32         ` Pavan Nikhilesh Bhagavatula
  2019-04-09 12:24         ` Yao, Lei A
  1 sibling, 2 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-04-09  9:32 UTC (permalink / raw)
  To: Lin, Xueqin, Yigit, Ferruh
  Cc: dev, Xu, Qian Q, Li, WenjieX A, Wang, FengqinX, Yao, Lei A, Wang,
	Yinan, Jerin Jacob Kollanukkaran, thomas, arybchenko, Iremonger,
	Bernard, alialnu, Zhang, Qi Z

Hi Lin,

Can you check if the following patch fixes the issue?
http://patches.dpdk.org/patch/52395/

I wasn't able to catch this earlier.

Regards,
Pavan.

>-----Original Message-----
>From: Lin, Xueqin <xueqin.lin@intel.com>
>Sent: Tuesday, April 9, 2019 2:58 PM
>To: Pavan Nikhilesh Bhagavatula <pbhagavatula@marvell.com>; Yigit, Ferruh
><ferruh.yigit@intel.com>
>Cc: dev@dpdk.org; Xu, Qian Q <qian.q.xu@intel.com>; Li, WenjieX A
><wenjiex.a.li@intel.com>; Wang, FengqinX <fengqinx.wang@intel.com>; Yao,
>Lei A <lei.a.yao@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Jerin Jacob
>Kollanukkaran <jerinj@marvell.com>; thomas@monjalon.net;
>arybchenko@solarflare.com; Iremonger, Bernard
><bernard.iremonger@intel.com>; alialnu@mellanox.com; Zhang, Qi Z
><qi.z.zhang@intel.com>
>Subject: [EXT] RE: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare
>logic into a separate function
>
>External Email
>
>----------------------------------------------------------------------
>Hi NIkhilesh,
>
>This patchset impacts some of 19.05 rc1 txonly/burst tests on Intel NIC. If set
>txonly fwd, IXIA or tester peer can't receive packets that sent from app
>generated.
>This is high issue, block some cases test. Detailed information as below, need
>you to check it soon.
>
>*DPDK version:  19.05.0-rc1
>*NIC hardware: Fortville_eagle/Fortville_spirit/Niantic
>Environment: one NIC port connect with another NIC port, or one NIC port
>connect with IXIA
>
>Test Setup
>1. Bind port to igb_uio or vfio
>2. On DUT, setup testpmd:
>    ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x1e -n 4  -- -i --rxq=4 --txq=4 --
>port-topology=loop 3. Set txonly forward, start testpmd
>testpmd>set fwd txonly
>testpmd>start
>4. Dump packets from tester NIC port or IXIA, find no packets were received on
>the PORT0.
>tcpdump -i <tester_interface> -v
>
>Best regards,
>Xueqin
>
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pavan Nikhilesh
>> Bhagavatula
>> Sent: Tuesday, April 2, 2019 5:54 PM
>> To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
>> thomas@monjalon.net; arybchenko@solarflare.com; Yigit, Ferruh
>> <ferruh.yigit@intel.com>; Iremonger, Bernard
>> <bernard.iremonger@intel.com>; alialnu@mellanox.com
>> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
>> <pbhagavatula@marvell.com>
>> Subject: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic
>> into a separate function
>>
>> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>>
>> Move the packet prepare logic into a separate function so that it can
>> be reused later.
>>
>> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
>> ---
>>  app/test-pmd/txonly.c | 163
>> +++++++++++++++++++++---------------------
>>  1 file changed, 83 insertions(+), 80 deletions(-)
>>
>> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index
>> 65171c1d1..56ca0ad24 100644
>> --- a/app/test-pmd/txonly.c
>> +++ b/app/test-pmd/txonly.c
>> @@ -148,6 +148,80 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
>>  	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;  }
>>
>> +static inline bool
>> +pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
>> +		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
>> +		const uint16_t vlan_tci_outer, const uint64_t ol_flags) {
>> +	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
>> +	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
>> +	struct rte_mbuf *pkt_seg;
>> +	uint32_t nb_segs, pkt_len;
>> +	uint8_t i;
>> +
>> +	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
>> +		nb_segs = random() % tx_pkt_nb_segs + 1;
>> +	else
>> +		nb_segs = tx_pkt_nb_segs;
>> +
>> +	if (nb_segs > 1) {
>> +		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs))
>> +			return false;
>> +	}
>> +
>> +	rte_pktmbuf_reset_headroom(pkt);
>> +	pkt->data_len = tx_pkt_seg_lengths[0];
>> +	pkt->ol_flags = ol_flags;
>> +	pkt->vlan_tci = vlan_tci;
>> +	pkt->vlan_tci_outer = vlan_tci_outer;
>> +	pkt->l2_len = sizeof(struct ether_hdr);
>> +	pkt->l3_len = sizeof(struct ipv4_hdr);
>> +
>> +	pkt_len = pkt->data_len;
>> +	pkt_seg = pkt;
>> +	for (i = 1; i < nb_segs; i++) {
>> +		pkt_seg->next = pkt_segs[i - 1];
>> +		pkt_seg = pkt_seg->next;
>> +		pkt_seg->data_len = tx_pkt_seg_lengths[i];
>> +		pkt_len += pkt_seg->data_len;
>> +	}
>> +	pkt_seg->next = NULL; /* Last segment of packet. */
>> +	/*
>> +	 * Copy headers in first packet segment(s).
>> +	 */
>> +	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
>> +	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
>> +			sizeof(struct ether_hdr));
>> +	if (txonly_multi_flow) {
>> +		struct ipv4_hdr *ip_hdr;
>> +		uint32_t addr;
>> +
>> +		ip_hdr = rte_pktmbuf_mtod_offset(pkt,
>> +				struct ipv4_hdr *,
>> +				sizeof(struct ether_hdr));
>> +		/*
>> +		 * Generate multiple flows by varying IP src addr. This
>> +		 * enables packets are well distributed by RSS in
>> +		 * receiver side if any and txonly mode can be a decent
>> +		 * packet generator for developer's quick performance
>> +		 * regression test.
>> +		 */
>> +		addr = (IP_DST_ADDR | (ip_var++ << 8)) + rte_lcore_id();
>> +		ip_hdr->src_addr = rte_cpu_to_be_32(addr);
>> +	}
>> +	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
>> +			sizeof(struct ether_hdr) +
>> +			sizeof(struct ipv4_hdr));
>> +	/*
>> +	 * Complete first mbuf of packet and append it to the
>> +	 * burst of packets to be transmitted.
>> +	 */
>> +	pkt->nb_segs = nb_segs;
>> +	pkt->pkt_len = pkt_len;
>> +
>> +	return true;
>> +}
>> +
>>  /*
>>   * Transmit a burst of multi-segments packets.
>>   */
>> @@ -155,10 +229,8 @@ static void
>>  pkt_burst_transmit(struct fwd_stream *fs)  {
>>  	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
>> -	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
>>  	struct rte_port *txp;
>>  	struct rte_mbuf *pkt;
>> -	struct rte_mbuf *pkt_seg;
>>  	struct rte_mempool *mbp;
>>  	struct ether_hdr eth_hdr;
>>  	uint16_t nb_tx;
>> @@ -166,15 +238,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
>>  	uint16_t vlan_tci, vlan_tci_outer;
>>  	uint32_t retry;
>>  	uint64_t ol_flags = 0;
>> -	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
>> -	uint8_t  i;
>>  	uint64_t tx_offloads;
>>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>>  	uint64_t start_tsc;
>>  	uint64_t end_tsc;
>>  	uint64_t core_cycles;
>>  #endif
>> -	uint32_t nb_segs, pkt_len;
>>
>>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>>  	start_tsc = rte_rdtsc();
>> @@ -201,85 +270,19 @@ pkt_burst_transmit(struct fwd_stream *fs)
>>
>>  	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
>>  		pkt = rte_mbuf_raw_alloc(mbp);
>> -		if (pkt == NULL) {
>> -		nomore_mbuf:
>> -			if (nb_pkt == 0)
>> -				return;
>> +		if (pkt == NULL)
>> +			break;
>> +		if (unlikely(!pkt_burst_prepare(pkt, mbp, &eth_hdr, vlan_tci,
>> +						vlan_tci_outer, ol_flags))) {
>> +			rte_pktmbuf_free(pkt);
>>  			break;
>>  		}
>> -
>> -		/*
>> -		 * Using raw alloc is good to improve performance,
>> -		 * but some consumers may use the headroom and so
>> -		 * decrement data_off. We need to make sure it is
>> -		 * reset to default value.
>> -		 */
>> -		rte_pktmbuf_reset_headroom(pkt);
>> -		pkt->data_len = tx_pkt_seg_lengths[0];
>> -		pkt_seg = pkt;
>> -
>> -		if (tx_pkt_split == TX_PKT_SPLIT_RND)
>> -			nb_segs = random() % tx_pkt_nb_segs + 1;
>> -		else
>> -			nb_segs = tx_pkt_nb_segs;
>> -
>> -		if (nb_segs > 1) {
>> -			if (rte_mempool_get_bulk(mbp, (void **)pkt_segs,
>> -							nb_segs)) {
>> -				rte_pktmbuf_free(pkt);
>> -				goto nomore_mbuf;
>> -			}
>> -		}
>> -
>> -		pkt_len = pkt->data_len;
>> -		for (i = 1; i < nb_segs; i++) {
>> -			pkt_seg->next = pkt_segs[i - 1];
>> -			pkt_seg = pkt_seg->next;
>> -			pkt_seg->data_len = tx_pkt_seg_lengths[i];
>> -			pkt_len += pkt_seg->data_len;
>> -		}
>> -		pkt_seg->next = NULL; /* Last segment of packet. */
>> -
>> -		/*
>> -		 * Copy headers in first packet segment(s).
>> -		 */
>> -		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
>> -		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
>> -				sizeof(struct ether_hdr));
>> -		if (txonly_multi_flow) {
>> -			struct ipv4_hdr *ip_hdr;
>> -			uint32_t addr;
>> -
>> -			ip_hdr = rte_pktmbuf_mtod_offset(pkt,
>> -					struct ipv4_hdr *,
>> -					sizeof(struct ether_hdr));
>> -			/*
>> -			 * Generate multiple flows by varying IP src addr. This
>> -			 * enables packets are well distributed by RSS in
>> -			 * receiver side if any and txonly mode can be a
>> decent
>> -			 * packet generator for developer's quick
>> performance
>> -			 * regression test.
>> -			 */
>> -			addr = (IP_DST_ADDR | (ip_var++ << 8)) +
>> rte_lcore_id();
>> -			ip_hdr->src_addr = rte_cpu_to_be_32(addr);
>> -		}
>> -		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
>> -				sizeof(struct ether_hdr) +
>> -				sizeof(struct ipv4_hdr));
>> -
>> -		/*
>> -		 * Complete first mbuf of packet and append it to the
>> -		 * burst of packets to be transmitted.
>> -		 */
>> -		pkt->nb_segs = nb_segs;
>> -		pkt->pkt_len = pkt_len;
>> -		pkt->ol_flags = ol_flags;
>> -		pkt->vlan_tci = vlan_tci;
>> -		pkt->vlan_tci_outer = vlan_tci_outer;
>> -		pkt->l2_len = sizeof(struct ether_hdr);
>> -		pkt->l3_len = sizeof(struct ipv4_hdr);
>>  		pkts_burst[nb_pkt] = pkt;
>>  	}
>> +
>> +	if (nb_pkt == 0)
>> +		return;
>> +
>>  	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
>> nb_pkt);
>>  	/*
>>  	 * Retry if necessary
>> --
>> 2.21.0

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic into a separate function
  2019-04-09  9:32       ` Pavan Nikhilesh Bhagavatula
@ 2019-04-09  9:32         ` Pavan Nikhilesh Bhagavatula
  2019-04-09 12:24         ` Yao, Lei A
  1 sibling, 0 replies; 68+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2019-04-09  9:32 UTC (permalink / raw)
  To: Lin, Xueqin, Yigit, Ferruh
  Cc: dev, Xu, Qian Q, Li, WenjieX A, Wang, FengqinX, Yao, Lei A, Wang,
	Yinan, Jerin Jacob Kollanukkaran, thomas, arybchenko, Iremonger,
	Bernard, alialnu, Zhang, Qi Z

Hi Lin,

Can you check if the following patch fixes the issue?
http://patches.dpdk.org/patch/52395/

I wasn't able to catch this earlier.

Regards,
Pavan.

>-----Original Message-----
>From: Lin, Xueqin <xueqin.lin@intel.com>
>Sent: Tuesday, April 9, 2019 2:58 PM
>To: Pavan Nikhilesh Bhagavatula <pbhagavatula@marvell.com>; Yigit, Ferruh
><ferruh.yigit@intel.com>
>Cc: dev@dpdk.org; Xu, Qian Q <qian.q.xu@intel.com>; Li, WenjieX A
><wenjiex.a.li@intel.com>; Wang, FengqinX <fengqinx.wang@intel.com>; Yao,
>Lei A <lei.a.yao@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Jerin Jacob
>Kollanukkaran <jerinj@marvell.com>; thomas@monjalon.net;
>arybchenko@solarflare.com; Iremonger, Bernard
><bernard.iremonger@intel.com>; alialnu@mellanox.com; Zhang, Qi Z
><qi.z.zhang@intel.com>
>Subject: [EXT] RE: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare
>logic into a separate function
>
>External Email
>
>----------------------------------------------------------------------
>Hi NIkhilesh,
>
>This patchset impacts some of 19.05 rc1 txonly/burst tests on Intel NIC. If set
>txonly fwd, IXIA or tester peer can't receive packets that sent from app
>generated.
>This is high issue, block some cases test. Detailed information as below, need
>you to check it soon.
>
>*DPDK version:  19.05.0-rc1
>*NIC hardware: Fortville_eagle/Fortville_spirit/Niantic
>Environment: one NIC port connect with another NIC port, or one NIC port
>connect with IXIA
>
>Test Setup
>1. Bind port to igb_uio or vfio
>2. On DUT, setup testpmd:
>    ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x1e -n 4  -- -i --rxq=4 --txq=4 --
>port-topology=loop 3. Set txonly forward, start testpmd
>testpmd>set fwd txonly
>testpmd>start
>4. Dump packets from tester NIC port or IXIA, find no packets were received on
>the PORT0.
>tcpdump -i <tester_interface> -v
>
>Best regards,
>Xueqin
>
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pavan Nikhilesh
>> Bhagavatula
>> Sent: Tuesday, April 2, 2019 5:54 PM
>> To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
>> thomas@monjalon.net; arybchenko@solarflare.com; Yigit, Ferruh
>> <ferruh.yigit@intel.com>; Iremonger, Bernard
>> <bernard.iremonger@intel.com>; alialnu@mellanox.com
>> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
>> <pbhagavatula@marvell.com>
>> Subject: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic
>> into a separate function
>>
>> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>>
>> Move the packet prepare logic into a separate function so that it can
>> be reused later.
>>
>> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
>> ---
>>  app/test-pmd/txonly.c | 163
>> +++++++++++++++++++++---------------------
>>  1 file changed, 83 insertions(+), 80 deletions(-)
>>
>> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index
>> 65171c1d1..56ca0ad24 100644
>> --- a/app/test-pmd/txonly.c
>> +++ b/app/test-pmd/txonly.c
>> @@ -148,6 +148,80 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
>>  	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;  }
>>
>> +static inline bool
>> +pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
>> +		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
>> +		const uint16_t vlan_tci_outer, const uint64_t ol_flags) {
>> +	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
>> +	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
>> +	struct rte_mbuf *pkt_seg;
>> +	uint32_t nb_segs, pkt_len;
>> +	uint8_t i;
>> +
>> +	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
>> +		nb_segs = random() % tx_pkt_nb_segs + 1;
>> +	else
>> +		nb_segs = tx_pkt_nb_segs;
>> +
>> +	if (nb_segs > 1) {
>> +		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs))
>> +			return false;
>> +	}
>> +
>> +	rte_pktmbuf_reset_headroom(pkt);
>> +	pkt->data_len = tx_pkt_seg_lengths[0];
>> +	pkt->ol_flags = ol_flags;
>> +	pkt->vlan_tci = vlan_tci;
>> +	pkt->vlan_tci_outer = vlan_tci_outer;
>> +	pkt->l2_len = sizeof(struct ether_hdr);
>> +	pkt->l3_len = sizeof(struct ipv4_hdr);
>> +
>> +	pkt_len = pkt->data_len;
>> +	pkt_seg = pkt;
>> +	for (i = 1; i < nb_segs; i++) {
>> +		pkt_seg->next = pkt_segs[i - 1];
>> +		pkt_seg = pkt_seg->next;
>> +		pkt_seg->data_len = tx_pkt_seg_lengths[i];
>> +		pkt_len += pkt_seg->data_len;
>> +	}
>> +	pkt_seg->next = NULL; /* Last segment of packet. */
>> +	/*
>> +	 * Copy headers in first packet segment(s).
>> +	 */
>> +	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
>> +	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
>> +			sizeof(struct ether_hdr));
>> +	if (txonly_multi_flow) {
>> +		struct ipv4_hdr *ip_hdr;
>> +		uint32_t addr;
>> +
>> +		ip_hdr = rte_pktmbuf_mtod_offset(pkt,
>> +				struct ipv4_hdr *,
>> +				sizeof(struct ether_hdr));
>> +		/*
>> +		 * Generate multiple flows by varying IP src addr. This
>> +		 * enables packets are well distributed by RSS in
>> +		 * receiver side if any and txonly mode can be a decent
>> +		 * packet generator for developer's quick performance
>> +		 * regression test.
>> +		 */
>> +		addr = (IP_DST_ADDR | (ip_var++ << 8)) + rte_lcore_id();
>> +		ip_hdr->src_addr = rte_cpu_to_be_32(addr);
>> +	}
>> +	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
>> +			sizeof(struct ether_hdr) +
>> +			sizeof(struct ipv4_hdr));
>> +	/*
>> +	 * Complete first mbuf of packet and append it to the
>> +	 * burst of packets to be transmitted.
>> +	 */
>> +	pkt->nb_segs = nb_segs;
>> +	pkt->pkt_len = pkt_len;
>> +
>> +	return true;
>> +}
>> +
>>  /*
>>   * Transmit a burst of multi-segments packets.
>>   */
>> @@ -155,10 +229,8 @@ static void
>>  pkt_burst_transmit(struct fwd_stream *fs)  {
>>  	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
>> -	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
>>  	struct rte_port *txp;
>>  	struct rte_mbuf *pkt;
>> -	struct rte_mbuf *pkt_seg;
>>  	struct rte_mempool *mbp;
>>  	struct ether_hdr eth_hdr;
>>  	uint16_t nb_tx;
>> @@ -166,15 +238,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
>>  	uint16_t vlan_tci, vlan_tci_outer;
>>  	uint32_t retry;
>>  	uint64_t ol_flags = 0;
>> -	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
>> -	uint8_t  i;
>>  	uint64_t tx_offloads;
>>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>>  	uint64_t start_tsc;
>>  	uint64_t end_tsc;
>>  	uint64_t core_cycles;
>>  #endif
>> -	uint32_t nb_segs, pkt_len;
>>
>>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>>  	start_tsc = rte_rdtsc();
>> @@ -201,85 +270,19 @@ pkt_burst_transmit(struct fwd_stream *fs)
>>
>>  	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
>>  		pkt = rte_mbuf_raw_alloc(mbp);
>> -		if (pkt == NULL) {
>> -		nomore_mbuf:
>> -			if (nb_pkt == 0)
>> -				return;
>> +		if (pkt == NULL)
>> +			break;
>> +		if (unlikely(!pkt_burst_prepare(pkt, mbp, &eth_hdr, vlan_tci,
>> +						vlan_tci_outer, ol_flags))) {
>> +			rte_pktmbuf_free(pkt);
>>  			break;
>>  		}
>> -
>> -		/*
>> -		 * Using raw alloc is good to improve performance,
>> -		 * but some consumers may use the headroom and so
>> -		 * decrement data_off. We need to make sure it is
>> -		 * reset to default value.
>> -		 */
>> -		rte_pktmbuf_reset_headroom(pkt);
>> -		pkt->data_len = tx_pkt_seg_lengths[0];
>> -		pkt_seg = pkt;
>> -
>> -		if (tx_pkt_split == TX_PKT_SPLIT_RND)
>> -			nb_segs = random() % tx_pkt_nb_segs + 1;
>> -		else
>> -			nb_segs = tx_pkt_nb_segs;
>> -
>> -		if (nb_segs > 1) {
>> -			if (rte_mempool_get_bulk(mbp, (void **)pkt_segs,
>> -							nb_segs)) {
>> -				rte_pktmbuf_free(pkt);
>> -				goto nomore_mbuf;
>> -			}
>> -		}
>> -
>> -		pkt_len = pkt->data_len;
>> -		for (i = 1; i < nb_segs; i++) {
>> -			pkt_seg->next = pkt_segs[i - 1];
>> -			pkt_seg = pkt_seg->next;
>> -			pkt_seg->data_len = tx_pkt_seg_lengths[i];
>> -			pkt_len += pkt_seg->data_len;
>> -		}
>> -		pkt_seg->next = NULL; /* Last segment of packet. */
>> -
>> -		/*
>> -		 * Copy headers in first packet segment(s).
>> -		 */
>> -		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
>> -		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
>> -				sizeof(struct ether_hdr));
>> -		if (txonly_multi_flow) {
>> -			struct ipv4_hdr *ip_hdr;
>> -			uint32_t addr;
>> -
>> -			ip_hdr = rte_pktmbuf_mtod_offset(pkt,
>> -					struct ipv4_hdr *,
>> -					sizeof(struct ether_hdr));
>> -			/*
>> -			 * Generate multiple flows by varying IP src addr. This
>> -			 * enables packets are well distributed by RSS in
>> -			 * receiver side if any and txonly mode can be a
>> decent
>> -			 * packet generator for developer's quick
>> performance
>> -			 * regression test.
>> -			 */
>> -			addr = (IP_DST_ADDR | (ip_var++ << 8)) +
>> rte_lcore_id();
>> -			ip_hdr->src_addr = rte_cpu_to_be_32(addr);
>> -		}
>> -		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
>> -				sizeof(struct ether_hdr) +
>> -				sizeof(struct ipv4_hdr));
>> -
>> -		/*
>> -		 * Complete first mbuf of packet and append it to the
>> -		 * burst of packets to be transmitted.
>> -		 */
>> -		pkt->nb_segs = nb_segs;
>> -		pkt->pkt_len = pkt_len;
>> -		pkt->ol_flags = ol_flags;
>> -		pkt->vlan_tci = vlan_tci;
>> -		pkt->vlan_tci_outer = vlan_tci_outer;
>> -		pkt->l2_len = sizeof(struct ether_hdr);
>> -		pkt->l3_len = sizeof(struct ipv4_hdr);
>>  		pkts_burst[nb_pkt] = pkt;
>>  	}
>> +
>> +	if (nb_pkt == 0)
>> +		return;
>> +
>>  	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
>> nb_pkt);
>>  	/*
>>  	 * Retry if necessary
>> --
>> 2.21.0


^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic into a separate function
  2019-04-09  9:32       ` Pavan Nikhilesh Bhagavatula
  2019-04-09  9:32         ` Pavan Nikhilesh Bhagavatula
@ 2019-04-09 12:24         ` Yao, Lei A
  2019-04-09 12:24           ` Yao, Lei A
  2019-04-09 12:29           ` Ferruh Yigit
  1 sibling, 2 replies; 68+ messages in thread
From: Yao, Lei A @ 2019-04-09 12:24 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Lin, Xueqin, Yigit, Ferruh
  Cc: dev, Xu, Qian Q, Li, WenjieX A, Wang, FengqinX, Wang, Yinan,
	Jerin Jacob Kollanukkaran, thomas, arybchenko, Iremonger,
	Bernard, alialnu, Zhang, Qi Z



> -----Original Message-----
> From: Pavan Nikhilesh Bhagavatula [mailto:pbhagavatula@marvell.com]
> Sent: Tuesday, April 9, 2019 5:33 PM
> To: Lin, Xueqin <xueqin.lin@intel.com>; Yigit, Ferruh <ferruh.yigit@intel.com>
> Cc: dev@dpdk.org; Xu, Qian Q <qian.q.xu@intel.com>; Li, WenjieX A
> <wenjiex.a.li@intel.com>; Wang, FengqinX <fengqinx.wang@intel.com>;
> Yao, Lei A <lei.a.yao@intel.com>; Wang, Yinan <yinan.wang@intel.com>;
> Jerin Jacob Kollanukkaran <jerinj@marvell.com>; thomas@monjalon.net;
> arybchenko@solarflare.com; Iremonger, Bernard
> <bernard.iremonger@intel.com>; alialnu@mellanox.com; Zhang, Qi Z
> <qi.z.zhang@intel.com>
> Subject: RE: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic
> into a separate function
> 
> Hi Lin,
> 
> Can you check if the following patch fixes the issue?
> http://patches.dpdk.org/patch/52395/
> 
> I wasn't able to catch this earlier.
> 
> Regards,
> Pavan

Hi, Pavan

With this patch, testpmd can generate packets with correct src
mac address at my side now. Thanks

BRs
Lei


> 
> >-----Original Message-----
> >From: Lin, Xueqin <xueqin.lin@intel.com>
> >Sent: Tuesday, April 9, 2019 2:58 PM
> >To: Pavan Nikhilesh Bhagavatula <pbhagavatula@marvell.com>; Yigit,
> Ferruh
> ><ferruh.yigit@intel.com>
> >Cc: dev@dpdk.org; Xu, Qian Q <qian.q.xu@intel.com>; Li, WenjieX A
> ><wenjiex.a.li@intel.com>; Wang, FengqinX <fengqinx.wang@intel.com>;
> Yao,
> >Lei A <lei.a.yao@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Jerin
> Jacob
> >Kollanukkaran <jerinj@marvell.com>; thomas@monjalon.net;
> >arybchenko@solarflare.com; Iremonger, Bernard
> ><bernard.iremonger@intel.com>; alialnu@mellanox.com; Zhang, Qi Z
> ><qi.z.zhang@intel.com>
> >Subject: [EXT] RE: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt
> prepare
> >logic into a separate function
> >
> >External Email
> >
> >----------------------------------------------------------------------
> >Hi NIkhilesh,
> >
> >This patchset impacts some of 19.05 rc1 txonly/burst tests on Intel NIC. If
> set
> >txonly fwd, IXIA or tester peer can't receive packets that sent from app
> >generated.
> >This is high issue, block some cases test. Detailed information as below,
> need
> >you to check it soon.
> >
> >*DPDK version:  19.05.0-rc1
> >*NIC hardware: Fortville_eagle/Fortville_spirit/Niantic
> >Environment: one NIC port connect with another NIC port, or one NIC port
> >connect with IXIA
> >
> >Test Setup
> >1. Bind port to igb_uio or vfio
> >2. On DUT, setup testpmd:
> >    ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x1e -n 4  -- -i --rxq=4 --
> txq=4 --
> >port-topology=loop 3. Set txonly forward, start testpmd
> >testpmd>set fwd txonly
> >testpmd>start
> >4. Dump packets from tester NIC port or IXIA, find no packets were
> received on
> >the PORT0.
> >tcpdump -i <tester_interface> -v
> >
> >Best regards,
> >Xueqin
> >
> >> -----Original Message-----
> >> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pavan Nikhilesh
> >> Bhagavatula
> >> Sent: Tuesday, April 2, 2019 5:54 PM
> >> To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> >> thomas@monjalon.net; arybchenko@solarflare.com; Yigit, Ferruh
> >> <ferruh.yigit@intel.com>; Iremonger, Bernard
> >> <bernard.iremonger@intel.com>; alialnu@mellanox.com
> >> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> >> <pbhagavatula@marvell.com>
> >> Subject: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic
> >> into a separate function
> >>
> >> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >>
> >> Move the packet prepare logic into a separate function so that it can
> >> be reused later.
> >>
> >> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >> ---
> >>  app/test-pmd/txonly.c | 163
> >> +++++++++++++++++++++---------------------
> >>  1 file changed, 83 insertions(+), 80 deletions(-)
> >>
> >> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index
> >> 65171c1d1..56ca0ad24 100644
> >> --- a/app/test-pmd/txonly.c
> >> +++ b/app/test-pmd/txonly.c
> >> @@ -148,6 +148,80 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr
> *ip_hdr,
> >>  	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;  }
> >>
> >> +static inline bool
> >> +pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
> >> +		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
> >> +		const uint16_t vlan_tci_outer, const uint64_t ol_flags) {
> >> +	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
> >> +	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
> >> +	struct rte_mbuf *pkt_seg;
> >> +	uint32_t nb_segs, pkt_len;
> >> +	uint8_t i;
> >> +
> >> +	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
> >> +		nb_segs = random() % tx_pkt_nb_segs + 1;
> >> +	else
> >> +		nb_segs = tx_pkt_nb_segs;
> >> +
> >> +	if (nb_segs > 1) {
> >> +		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs,
> nb_segs))
> >> +			return false;
> >> +	}
> >> +
> >> +	rte_pktmbuf_reset_headroom(pkt);
> >> +	pkt->data_len = tx_pkt_seg_lengths[0];
> >> +	pkt->ol_flags = ol_flags;
> >> +	pkt->vlan_tci = vlan_tci;
> >> +	pkt->vlan_tci_outer = vlan_tci_outer;
> >> +	pkt->l2_len = sizeof(struct ether_hdr);
> >> +	pkt->l3_len = sizeof(struct ipv4_hdr);
> >> +
> >> +	pkt_len = pkt->data_len;
> >> +	pkt_seg = pkt;
> >> +	for (i = 1; i < nb_segs; i++) {
> >> +		pkt_seg->next = pkt_segs[i - 1];
> >> +		pkt_seg = pkt_seg->next;
> >> +		pkt_seg->data_len = tx_pkt_seg_lengths[i];
> >> +		pkt_len += pkt_seg->data_len;
> >> +	}
> >> +	pkt_seg->next = NULL; /* Last segment of packet. */
> >> +	/*
> >> +	 * Copy headers in first packet segment(s).
> >> +	 */
> >> +	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
> >> +	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> >> +			sizeof(struct ether_hdr));
> >> +	if (txonly_multi_flow) {
> >> +		struct ipv4_hdr *ip_hdr;
> >> +		uint32_t addr;
> >> +
> >> +		ip_hdr = rte_pktmbuf_mtod_offset(pkt,
> >> +				struct ipv4_hdr *,
> >> +				sizeof(struct ether_hdr));
> >> +		/*
> >> +		 * Generate multiple flows by varying IP src addr. This
> >> +		 * enables packets are well distributed by RSS in
> >> +		 * receiver side if any and txonly mode can be a decent
> >> +		 * packet generator for developer's quick performance
> >> +		 * regression test.
> >> +		 */
> >> +		addr = (IP_DST_ADDR | (ip_var++ << 8)) + rte_lcore_id();
> >> +		ip_hdr->src_addr = rte_cpu_to_be_32(addr);
> >> +	}
> >> +	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> >> +			sizeof(struct ether_hdr) +
> >> +			sizeof(struct ipv4_hdr));
> >> +	/*
> >> +	 * Complete first mbuf of packet and append it to the
> >> +	 * burst of packets to be transmitted.
> >> +	 */
> >> +	pkt->nb_segs = nb_segs;
> >> +	pkt->pkt_len = pkt_len;
> >> +
> >> +	return true;
> >> +}
> >> +
> >>  /*
> >>   * Transmit a burst of multi-segments packets.
> >>   */
> >> @@ -155,10 +229,8 @@ static void
> >>  pkt_burst_transmit(struct fwd_stream *fs)  {
> >>  	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> >> -	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
> >>  	struct rte_port *txp;
> >>  	struct rte_mbuf *pkt;
> >> -	struct rte_mbuf *pkt_seg;
> >>  	struct rte_mempool *mbp;
> >>  	struct ether_hdr eth_hdr;
> >>  	uint16_t nb_tx;
> >> @@ -166,15 +238,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
> >>  	uint16_t vlan_tci, vlan_tci_outer;
> >>  	uint32_t retry;
> >>  	uint64_t ol_flags = 0;
> >> -	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
> >> -	uint8_t  i;
> >>  	uint64_t tx_offloads;
> >>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
> >>  	uint64_t start_tsc;
> >>  	uint64_t end_tsc;
> >>  	uint64_t core_cycles;
> >>  #endif
> >> -	uint32_t nb_segs, pkt_len;
> >>
> >>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
> >>  	start_tsc = rte_rdtsc();
> >> @@ -201,85 +270,19 @@ pkt_burst_transmit(struct fwd_stream *fs)
> >>
> >>  	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
> >>  		pkt = rte_mbuf_raw_alloc(mbp);
> >> -		if (pkt == NULL) {
> >> -		nomore_mbuf:
> >> -			if (nb_pkt == 0)
> >> -				return;
> >> +		if (pkt == NULL)
> >> +			break;
> >> +		if (unlikely(!pkt_burst_prepare(pkt, mbp, &eth_hdr, vlan_tci,
> >> +						vlan_tci_outer, ol_flags))) {
> >> +			rte_pktmbuf_free(pkt);
> >>  			break;
> >>  		}
> >> -
> >> -		/*
> >> -		 * Using raw alloc is good to improve performance,
> >> -		 * but some consumers may use the headroom and so
> >> -		 * decrement data_off. We need to make sure it is
> >> -		 * reset to default value.
> >> -		 */
> >> -		rte_pktmbuf_reset_headroom(pkt);
> >> -		pkt->data_len = tx_pkt_seg_lengths[0];
> >> -		pkt_seg = pkt;
> >> -
> >> -		if (tx_pkt_split == TX_PKT_SPLIT_RND)
> >> -			nb_segs = random() % tx_pkt_nb_segs + 1;
> >> -		else
> >> -			nb_segs = tx_pkt_nb_segs;
> >> -
> >> -		if (nb_segs > 1) {
> >> -			if (rte_mempool_get_bulk(mbp, (void **)pkt_segs,
> >> -							nb_segs)) {
> >> -				rte_pktmbuf_free(pkt);
> >> -				goto nomore_mbuf;
> >> -			}
> >> -		}
> >> -
> >> -		pkt_len = pkt->data_len;
> >> -		for (i = 1; i < nb_segs; i++) {
> >> -			pkt_seg->next = pkt_segs[i - 1];
> >> -			pkt_seg = pkt_seg->next;
> >> -			pkt_seg->data_len = tx_pkt_seg_lengths[i];
> >> -			pkt_len += pkt_seg->data_len;
> >> -		}
> >> -		pkt_seg->next = NULL; /* Last segment of packet. */
> >> -
> >> -		/*
> >> -		 * Copy headers in first packet segment(s).
> >> -		 */
> >> -		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
> >> -		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> >> -				sizeof(struct ether_hdr));
> >> -		if (txonly_multi_flow) {
> >> -			struct ipv4_hdr *ip_hdr;
> >> -			uint32_t addr;
> >> -
> >> -			ip_hdr = rte_pktmbuf_mtod_offset(pkt,
> >> -					struct ipv4_hdr *,
> >> -					sizeof(struct ether_hdr));
> >> -			/*
> >> -			 * Generate multiple flows by varying IP src addr. This
> >> -			 * enables packets are well distributed by RSS in
> >> -			 * receiver side if any and txonly mode can be a
> >> decent
> >> -			 * packet generator for developer's quick
> >> performance
> >> -			 * regression test.
> >> -			 */
> >> -			addr = (IP_DST_ADDR | (ip_var++ << 8)) +
> >> rte_lcore_id();
> >> -			ip_hdr->src_addr = rte_cpu_to_be_32(addr);
> >> -		}
> >> -		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> >> -				sizeof(struct ether_hdr) +
> >> -				sizeof(struct ipv4_hdr));
> >> -
> >> -		/*
> >> -		 * Complete first mbuf of packet and append it to the
> >> -		 * burst of packets to be transmitted.
> >> -		 */
> >> -		pkt->nb_segs = nb_segs;
> >> -		pkt->pkt_len = pkt_len;
> >> -		pkt->ol_flags = ol_flags;
> >> -		pkt->vlan_tci = vlan_tci;
> >> -		pkt->vlan_tci_outer = vlan_tci_outer;
> >> -		pkt->l2_len = sizeof(struct ether_hdr);
> >> -		pkt->l3_len = sizeof(struct ipv4_hdr);
> >>  		pkts_burst[nb_pkt] = pkt;
> >>  	}
> >> +
> >> +	if (nb_pkt == 0)
> >> +		return;
> >> +
> >>  	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
> >> nb_pkt);
> >>  	/*
> >>  	 * Retry if necessary
> >> --
> >> 2.21.0

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic into a separate function
  2019-04-09 12:24         ` Yao, Lei A
@ 2019-04-09 12:24           ` Yao, Lei A
  2019-04-09 12:29           ` Ferruh Yigit
  1 sibling, 0 replies; 68+ messages in thread
From: Yao, Lei A @ 2019-04-09 12:24 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Lin, Xueqin, Yigit, Ferruh
  Cc: dev, Xu, Qian Q, Li, WenjieX A, Wang, FengqinX, Wang, Yinan,
	Jerin Jacob Kollanukkaran, thomas, arybchenko, Iremonger,
	Bernard, alialnu, Zhang, Qi Z



> -----Original Message-----
> From: Pavan Nikhilesh Bhagavatula [mailto:pbhagavatula@marvell.com]
> Sent: Tuesday, April 9, 2019 5:33 PM
> To: Lin, Xueqin <xueqin.lin@intel.com>; Yigit, Ferruh <ferruh.yigit@intel.com>
> Cc: dev@dpdk.org; Xu, Qian Q <qian.q.xu@intel.com>; Li, WenjieX A
> <wenjiex.a.li@intel.com>; Wang, FengqinX <fengqinx.wang@intel.com>;
> Yao, Lei A <lei.a.yao@intel.com>; Wang, Yinan <yinan.wang@intel.com>;
> Jerin Jacob Kollanukkaran <jerinj@marvell.com>; thomas@monjalon.net;
> arybchenko@solarflare.com; Iremonger, Bernard
> <bernard.iremonger@intel.com>; alialnu@mellanox.com; Zhang, Qi Z
> <qi.z.zhang@intel.com>
> Subject: RE: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic
> into a separate function
> 
> Hi Lin,
> 
> Can you check if the following patch fixes the issue?
> http://patches.dpdk.org/patch/52395/
> 
> I wasn't able to catch this earlier.
> 
> Regards,
> Pavan

Hi, Pavan

With this patch, testpmd can generate packets with correct src
mac address at my side now. Thanks

BRs
Lei


> 
> >-----Original Message-----
> >From: Lin, Xueqin <xueqin.lin@intel.com>
> >Sent: Tuesday, April 9, 2019 2:58 PM
> >To: Pavan Nikhilesh Bhagavatula <pbhagavatula@marvell.com>; Yigit,
> Ferruh
> ><ferruh.yigit@intel.com>
> >Cc: dev@dpdk.org; Xu, Qian Q <qian.q.xu@intel.com>; Li, WenjieX A
> ><wenjiex.a.li@intel.com>; Wang, FengqinX <fengqinx.wang@intel.com>;
> Yao,
> >Lei A <lei.a.yao@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Jerin
> Jacob
> >Kollanukkaran <jerinj@marvell.com>; thomas@monjalon.net;
> >arybchenko@solarflare.com; Iremonger, Bernard
> ><bernard.iremonger@intel.com>; alialnu@mellanox.com; Zhang, Qi Z
> ><qi.z.zhang@intel.com>
> >Subject: [EXT] RE: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt
> prepare
> >logic into a separate function
> >
> >External Email
> >
> >----------------------------------------------------------------------
> >Hi NIkhilesh,
> >
> >This patchset impacts some of 19.05 rc1 txonly/burst tests on Intel NIC. If
> set
> >txonly fwd, IXIA or tester peer can't receive packets that sent from app
> >generated.
> >This is high issue, block some cases test. Detailed information as below,
> need
> >you to check it soon.
> >
> >*DPDK version:  19.05.0-rc1
> >*NIC hardware: Fortville_eagle/Fortville_spirit/Niantic
> >Environment: one NIC port connect with another NIC port, or one NIC port
> >connect with IXIA
> >
> >Test Setup
> >1. Bind port to igb_uio or vfio
> >2. On DUT, setup testpmd:
> >    ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x1e -n 4  -- -i --rxq=4 --
> txq=4 --
> >port-topology=loop 3. Set txonly forward, start testpmd
> >testpmd>set fwd txonly
> >testpmd>start
> >4. Dump packets from tester NIC port or IXIA, find no packets were
> received on
> >the PORT0.
> >tcpdump -i <tester_interface> -v
> >
> >Best regards,
> >Xueqin
> >
> >> -----Original Message-----
> >> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pavan Nikhilesh
> >> Bhagavatula
> >> Sent: Tuesday, April 2, 2019 5:54 PM
> >> To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> >> thomas@monjalon.net; arybchenko@solarflare.com; Yigit, Ferruh
> >> <ferruh.yigit@intel.com>; Iremonger, Bernard
> >> <bernard.iremonger@intel.com>; alialnu@mellanox.com
> >> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> >> <pbhagavatula@marvell.com>
> >> Subject: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic
> >> into a separate function
> >>
> >> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >>
> >> Move the packet prepare logic into a separate function so that it can
> >> be reused later.
> >>
> >> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >> ---
> >>  app/test-pmd/txonly.c | 163
> >> +++++++++++++++++++++---------------------
> >>  1 file changed, 83 insertions(+), 80 deletions(-)
> >>
> >> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index
> >> 65171c1d1..56ca0ad24 100644
> >> --- a/app/test-pmd/txonly.c
> >> +++ b/app/test-pmd/txonly.c
> >> @@ -148,6 +148,80 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr
> *ip_hdr,
> >>  	ip_hdr->hdr_checksum = (uint16_t) ip_cksum;  }
> >>
> >> +static inline bool
> >> +pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
> >> +		struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
> >> +		const uint16_t vlan_tci_outer, const uint64_t ol_flags) {
> >> +	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
> >> +	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
> >> +	struct rte_mbuf *pkt_seg;
> >> +	uint32_t nb_segs, pkt_len;
> >> +	uint8_t i;
> >> +
> >> +	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
> >> +		nb_segs = random() % tx_pkt_nb_segs + 1;
> >> +	else
> >> +		nb_segs = tx_pkt_nb_segs;
> >> +
> >> +	if (nb_segs > 1) {
> >> +		if (rte_mempool_get_bulk(mbp, (void **)pkt_segs,
> nb_segs))
> >> +			return false;
> >> +	}
> >> +
> >> +	rte_pktmbuf_reset_headroom(pkt);
> >> +	pkt->data_len = tx_pkt_seg_lengths[0];
> >> +	pkt->ol_flags = ol_flags;
> >> +	pkt->vlan_tci = vlan_tci;
> >> +	pkt->vlan_tci_outer = vlan_tci_outer;
> >> +	pkt->l2_len = sizeof(struct ether_hdr);
> >> +	pkt->l3_len = sizeof(struct ipv4_hdr);
> >> +
> >> +	pkt_len = pkt->data_len;
> >> +	pkt_seg = pkt;
> >> +	for (i = 1; i < nb_segs; i++) {
> >> +		pkt_seg->next = pkt_segs[i - 1];
> >> +		pkt_seg = pkt_seg->next;
> >> +		pkt_seg->data_len = tx_pkt_seg_lengths[i];
> >> +		pkt_len += pkt_seg->data_len;
> >> +	}
> >> +	pkt_seg->next = NULL; /* Last segment of packet. */
> >> +	/*
> >> +	 * Copy headers in first packet segment(s).
> >> +	 */
> >> +	copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
> >> +	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> >> +			sizeof(struct ether_hdr));
> >> +	if (txonly_multi_flow) {
> >> +		struct ipv4_hdr *ip_hdr;
> >> +		uint32_t addr;
> >> +
> >> +		ip_hdr = rte_pktmbuf_mtod_offset(pkt,
> >> +				struct ipv4_hdr *,
> >> +				sizeof(struct ether_hdr));
> >> +		/*
> >> +		 * Generate multiple flows by varying IP src addr. This
> >> +		 * enables packets are well distributed by RSS in
> >> +		 * receiver side if any and txonly mode can be a decent
> >> +		 * packet generator for developer's quick performance
> >> +		 * regression test.
> >> +		 */
> >> +		addr = (IP_DST_ADDR | (ip_var++ << 8)) + rte_lcore_id();
> >> +		ip_hdr->src_addr = rte_cpu_to_be_32(addr);
> >> +	}
> >> +	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> >> +			sizeof(struct ether_hdr) +
> >> +			sizeof(struct ipv4_hdr));
> >> +	/*
> >> +	 * Complete first mbuf of packet and append it to the
> >> +	 * burst of packets to be transmitted.
> >> +	 */
> >> +	pkt->nb_segs = nb_segs;
> >> +	pkt->pkt_len = pkt_len;
> >> +
> >> +	return true;
> >> +}
> >> +
> >>  /*
> >>   * Transmit a burst of multi-segments packets.
> >>   */
> >> @@ -155,10 +229,8 @@ static void
> >>  pkt_burst_transmit(struct fwd_stream *fs)  {
> >>  	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> >> -	struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
> >>  	struct rte_port *txp;
> >>  	struct rte_mbuf *pkt;
> >> -	struct rte_mbuf *pkt_seg;
> >>  	struct rte_mempool *mbp;
> >>  	struct ether_hdr eth_hdr;
> >>  	uint16_t nb_tx;
> >> @@ -166,15 +238,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
> >>  	uint16_t vlan_tci, vlan_tci_outer;
> >>  	uint32_t retry;
> >>  	uint64_t ol_flags = 0;
> >> -	uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
> >> -	uint8_t  i;
> >>  	uint64_t tx_offloads;
> >>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
> >>  	uint64_t start_tsc;
> >>  	uint64_t end_tsc;
> >>  	uint64_t core_cycles;
> >>  #endif
> >> -	uint32_t nb_segs, pkt_len;
> >>
> >>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
> >>  	start_tsc = rte_rdtsc();
> >> @@ -201,85 +270,19 @@ pkt_burst_transmit(struct fwd_stream *fs)
> >>
> >>  	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
> >>  		pkt = rte_mbuf_raw_alloc(mbp);
> >> -		if (pkt == NULL) {
> >> -		nomore_mbuf:
> >> -			if (nb_pkt == 0)
> >> -				return;
> >> +		if (pkt == NULL)
> >> +			break;
> >> +		if (unlikely(!pkt_burst_prepare(pkt, mbp, &eth_hdr, vlan_tci,
> >> +						vlan_tci_outer, ol_flags))) {
> >> +			rte_pktmbuf_free(pkt);
> >>  			break;
> >>  		}
> >> -
> >> -		/*
> >> -		 * Using raw alloc is good to improve performance,
> >> -		 * but some consumers may use the headroom and so
> >> -		 * decrement data_off. We need to make sure it is
> >> -		 * reset to default value.
> >> -		 */
> >> -		rte_pktmbuf_reset_headroom(pkt);
> >> -		pkt->data_len = tx_pkt_seg_lengths[0];
> >> -		pkt_seg = pkt;
> >> -
> >> -		if (tx_pkt_split == TX_PKT_SPLIT_RND)
> >> -			nb_segs = random() % tx_pkt_nb_segs + 1;
> >> -		else
> >> -			nb_segs = tx_pkt_nb_segs;
> >> -
> >> -		if (nb_segs > 1) {
> >> -			if (rte_mempool_get_bulk(mbp, (void **)pkt_segs,
> >> -							nb_segs)) {
> >> -				rte_pktmbuf_free(pkt);
> >> -				goto nomore_mbuf;
> >> -			}
> >> -		}
> >> -
> >> -		pkt_len = pkt->data_len;
> >> -		for (i = 1; i < nb_segs; i++) {
> >> -			pkt_seg->next = pkt_segs[i - 1];
> >> -			pkt_seg = pkt_seg->next;
> >> -			pkt_seg->data_len = tx_pkt_seg_lengths[i];
> >> -			pkt_len += pkt_seg->data_len;
> >> -		}
> >> -		pkt_seg->next = NULL; /* Last segment of packet. */
> >> -
> >> -		/*
> >> -		 * Copy headers in first packet segment(s).
> >> -		 */
> >> -		copy_buf_to_pkt(&eth_hdr, sizeof(eth_hdr), pkt, 0);
> >> -		copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> >> -				sizeof(struct ether_hdr));
> >> -		if (txonly_multi_flow) {
> >> -			struct ipv4_hdr *ip_hdr;
> >> -			uint32_t addr;
> >> -
> >> -			ip_hdr = rte_pktmbuf_mtod_offset(pkt,
> >> -					struct ipv4_hdr *,
> >> -					sizeof(struct ether_hdr));
> >> -			/*
> >> -			 * Generate multiple flows by varying IP src addr. This
> >> -			 * enables packets are well distributed by RSS in
> >> -			 * receiver side if any and txonly mode can be a
> >> decent
> >> -			 * packet generator for developer's quick
> >> performance
> >> -			 * regression test.
> >> -			 */
> >> -			addr = (IP_DST_ADDR | (ip_var++ << 8)) +
> >> rte_lcore_id();
> >> -			ip_hdr->src_addr = rte_cpu_to_be_32(addr);
> >> -		}
> >> -		copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> >> -				sizeof(struct ether_hdr) +
> >> -				sizeof(struct ipv4_hdr));
> >> -
> >> -		/*
> >> -		 * Complete first mbuf of packet and append it to the
> >> -		 * burst of packets to be transmitted.
> >> -		 */
> >> -		pkt->nb_segs = nb_segs;
> >> -		pkt->pkt_len = pkt_len;
> >> -		pkt->ol_flags = ol_flags;
> >> -		pkt->vlan_tci = vlan_tci;
> >> -		pkt->vlan_tci_outer = vlan_tci_outer;
> >> -		pkt->l2_len = sizeof(struct ether_hdr);
> >> -		pkt->l3_len = sizeof(struct ipv4_hdr);
> >>  		pkts_burst[nb_pkt] = pkt;
> >>  	}
> >> +
> >> +	if (nb_pkt == 0)
> >> +		return;
> >> +
> >>  	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
> >> nb_pkt);
> >>  	/*
> >>  	 * Retry if necessary
> >> --
> >> 2.21.0


^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic into a separate function
  2019-04-09 12:24         ` Yao, Lei A
  2019-04-09 12:24           ` Yao, Lei A
@ 2019-04-09 12:29           ` Ferruh Yigit
  2019-04-09 12:29             ` Ferruh Yigit
  1 sibling, 1 reply; 68+ messages in thread
From: Ferruh Yigit @ 2019-04-09 12:29 UTC (permalink / raw)
  To: Yao, Lei A, Pavan Nikhilesh Bhagavatula, Lin, Xueqin
  Cc: dev, Xu, Qian Q, Li, WenjieX A, Wang, FengqinX, Wang, Yinan,
	Jerin Jacob Kollanukkaran, thomas, arybchenko, Iremonger,
	Bernard, alialnu, Zhang, Qi Z

On 4/9/2019 1:24 PM, Yao, Lei A wrote:
> 
> 
>> -----Original Message-----
>> From: Pavan Nikhilesh Bhagavatula [mailto:pbhagavatula@marvell.com]
>> Sent: Tuesday, April 9, 2019 5:33 PM
>> To: Lin, Xueqin <xueqin.lin@intel.com>; Yigit, Ferruh <ferruh.yigit@intel.com>
>> Cc: dev@dpdk.org; Xu, Qian Q <qian.q.xu@intel.com>; Li, WenjieX A
>> <wenjiex.a.li@intel.com>; Wang, FengqinX <fengqinx.wang@intel.com>;
>> Yao, Lei A <lei.a.yao@intel.com>; Wang, Yinan <yinan.wang@intel.com>;
>> Jerin Jacob Kollanukkaran <jerinj@marvell.com>; thomas@monjalon.net;
>> arybchenko@solarflare.com; Iremonger, Bernard
>> <bernard.iremonger@intel.com>; alialnu@mellanox.com; Zhang, Qi Z
>> <qi.z.zhang@intel.com>
>> Subject: RE: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic
>> into a separate function
>>
>> Hi Lin,
>>
>> Can you check if the following patch fixes the issue?
>> http://patches.dpdk.org/patch/52395/
>>
>> I wasn't able to catch this earlier.
>>
>> Regards,
>> Pavan
> 
> Hi, Pavan
> 
> With this patch, testpmd can generate packets with correct src
> mac address at my side now. Thanks

There is a new version of that patch [1], which I will check and get now.

[1]
https://patches.dpdk.org/patch/52461/

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic into a separate function
  2019-04-09 12:29           ` Ferruh Yigit
@ 2019-04-09 12:29             ` Ferruh Yigit
  0 siblings, 0 replies; 68+ messages in thread
From: Ferruh Yigit @ 2019-04-09 12:29 UTC (permalink / raw)
  To: Yao, Lei A, Pavan Nikhilesh Bhagavatula, Lin, Xueqin
  Cc: dev, Xu, Qian Q, Li, WenjieX A, Wang, FengqinX, Wang, Yinan,
	Jerin Jacob Kollanukkaran, thomas, arybchenko, Iremonger,
	Bernard, alialnu, Zhang, Qi Z

On 4/9/2019 1:24 PM, Yao, Lei A wrote:
> 
> 
>> -----Original Message-----
>> From: Pavan Nikhilesh Bhagavatula [mailto:pbhagavatula@marvell.com]
>> Sent: Tuesday, April 9, 2019 5:33 PM
>> To: Lin, Xueqin <xueqin.lin@intel.com>; Yigit, Ferruh <ferruh.yigit@intel.com>
>> Cc: dev@dpdk.org; Xu, Qian Q <qian.q.xu@intel.com>; Li, WenjieX A
>> <wenjiex.a.li@intel.com>; Wang, FengqinX <fengqinx.wang@intel.com>;
>> Yao, Lei A <lei.a.yao@intel.com>; Wang, Yinan <yinan.wang@intel.com>;
>> Jerin Jacob Kollanukkaran <jerinj@marvell.com>; thomas@monjalon.net;
>> arybchenko@solarflare.com; Iremonger, Bernard
>> <bernard.iremonger@intel.com>; alialnu@mellanox.com; Zhang, Qi Z
>> <qi.z.zhang@intel.com>
>> Subject: RE: [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic
>> into a separate function
>>
>> Hi Lin,
>>
>> Can you check if the following patch fixes the issue?
>> http://patches.dpdk.org/patch/52395/
>>
>> I wasn't able to catch this earlier.
>>
>> Regards,
>> Pavan
> 
> Hi, Pavan
> 
> With this patch, testpmd can generate packets with correct src
> mac address at my side now. Thanks

There is a new version of that patch [1], which I will check and get now.

[1]
https://patches.dpdk.org/patch/52461/

^ permalink raw reply	[flat|nested] 68+ messages in thread

end of thread, other threads:[~2019-04-09 12:30 UTC | newest]

Thread overview: 68+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-28 19:42 [dpdk-dev] [PATCH] app/testpmd: use mempool bulk get for txonly mode Pavan Nikhilesh Bhagavatula
2019-03-01  7:38 ` Andrew Rybchenko
2019-03-01  8:45   ` [dpdk-dev] [EXT] " Pavan Nikhilesh Bhagavatula
2019-03-01 13:47 ` [dpdk-dev] [PATCH v2] app/testpmd: add " Pavan Nikhilesh Bhagavatula
2019-03-19 16:48   ` Ferruh Yigit
2019-03-19 16:48     ` Ferruh Yigit
2019-03-20  4:53     ` Pavan Nikhilesh Bhagavatula
2019-03-20  4:53       ` Pavan Nikhilesh Bhagavatula
2019-03-26  8:43       ` Ferruh Yigit
2019-03-26  8:43         ` Ferruh Yigit
2019-03-26 11:00   ` Thomas Monjalon
2019-03-26 11:00     ` Thomas Monjalon
2019-03-26 11:50     ` Iremonger, Bernard
2019-03-26 11:50       ` Iremonger, Bernard
2019-03-26 12:06       ` Pavan Nikhilesh Bhagavatula
2019-03-26 12:06         ` Pavan Nikhilesh Bhagavatula
2019-03-26 13:16         ` Jerin Jacob Kollanukkaran
2019-03-26 13:16           ` Jerin Jacob Kollanukkaran
2019-03-26 12:26 ` [dpdk-dev] [PATCH v3 1/2] app/testpmd: optimize testpmd " Pavan Nikhilesh Bhagavatula
2019-03-26 12:26   ` Pavan Nikhilesh Bhagavatula
2019-03-26 12:27   ` [dpdk-dev] [PATCH v3 2/2] app/testpmd: add mempool bulk get for " Pavan Nikhilesh Bhagavatula
2019-03-26 12:27     ` Pavan Nikhilesh Bhagavatula
2019-03-26 13:03 ` [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd " Pavan Nikhilesh Bhagavatula
2019-03-26 13:03   ` Pavan Nikhilesh Bhagavatula
2019-03-26 13:03   ` [dpdk-dev] [PATCH v4 2/2] app/testpmd: add mempool bulk get for " Pavan Nikhilesh Bhagavatula
2019-03-26 13:03     ` Pavan Nikhilesh Bhagavatula
2019-03-26 16:13   ` [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd " Iremonger, Bernard
2019-03-26 16:13     ` Iremonger, Bernard
2019-03-31 13:14 ` [dpdk-dev] [PATCH v5 " Pavan Nikhilesh Bhagavatula
2019-03-31 13:14   ` Pavan Nikhilesh Bhagavatula
2019-03-31 13:14   ` [dpdk-dev] [PATCH v5 2/2] app/testpmd: add mempool bulk get for " Pavan Nikhilesh Bhagavatula
2019-03-31 13:14     ` Pavan Nikhilesh Bhagavatula
2019-04-01 20:25   ` [dpdk-dev] [PATCH v5 1/2] app/testpmd: optimize testpmd " Ferruh Yigit
2019-04-01 20:25     ` Ferruh Yigit
2019-04-01 20:53     ` Thomas Monjalon
2019-04-01 20:53       ` Thomas Monjalon
2019-04-02  1:03       ` Jerin Jacob Kollanukkaran
2019-04-02  1:03         ` Jerin Jacob Kollanukkaran
2019-04-02  7:06         ` Thomas Monjalon
2019-04-02  7:06           ` Thomas Monjalon
2019-04-02  8:31           ` Jerin Jacob Kollanukkaran
2019-04-02  8:31             ` Jerin Jacob Kollanukkaran
2019-04-02  9:03   ` Ali Alnubani
2019-04-02  9:03     ` Ali Alnubani
2019-04-02  9:06     ` Pavan Nikhilesh Bhagavatula
2019-04-02  9:06       ` Pavan Nikhilesh Bhagavatula
2019-04-02  9:53 ` [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop Pavan Nikhilesh Bhagavatula
2019-04-02  9:53   ` Pavan Nikhilesh Bhagavatula
2019-04-02  9:53   ` [dpdk-dev] [PATCH v6 2/4] app/testpmd: use bulk ops for allocating segments Pavan Nikhilesh Bhagavatula
2019-04-02  9:53     ` Pavan Nikhilesh Bhagavatula
2019-04-02  9:53   ` [dpdk-dev] [PATCH v6 3/4] app/testpmd: move pkt prepare logic into a separate function Pavan Nikhilesh Bhagavatula
2019-04-02  9:53     ` Pavan Nikhilesh Bhagavatula
2019-04-09  9:28     ` Lin, Xueqin
2019-04-09  9:28       ` Lin, Xueqin
2019-04-09  9:32       ` Pavan Nikhilesh Bhagavatula
2019-04-09  9:32         ` Pavan Nikhilesh Bhagavatula
2019-04-09 12:24         ` Yao, Lei A
2019-04-09 12:24           ` Yao, Lei A
2019-04-09 12:29           ` Ferruh Yigit
2019-04-09 12:29             ` Ferruh Yigit
2019-04-02  9:53   ` [dpdk-dev] [PATCH v6 4/4] app/testpmd: add mempool bulk get for txonly mode Pavan Nikhilesh Bhagavatula
2019-04-02  9:53     ` Pavan Nikhilesh Bhagavatula
2019-04-02 15:21   ` [dpdk-dev] [PATCH v6 1/4] app/testpmd: move eth header generation outside the loop Raslan Darawsheh
2019-04-02 15:21     ` Raslan Darawsheh
2019-04-04 16:23   ` Ferruh Yigit
2019-04-04 16:23     ` Ferruh Yigit
2019-04-05 17:37   ` Ferruh Yigit
2019-04-05 17:37     ` Ferruh Yigit

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).