* [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
@ 2020-11-04  7:28 Leyi Rong
  2020-11-04  8:14 ` David Marchand
  0 siblings, 1 reply; 13+ messages in thread
From: Leyi Rong @ 2020-11-04  7:28 UTC (permalink / raw)
  To: david.marchand, qi.z.zhang; +Cc: dev, Leyi Rong
Currently, l3fwd doesn't support multiple Tx queues, while
multiple Rx queues is supported.
To improve the throughput performance when polling multiple
queues, this patch enables multiple Tx queues handling on a lcore.
Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 examples/l3fwd/l3fwd_common.h        |  6 ++---
 examples/l3fwd/l3fwd_em.c            |  2 +-
 examples/l3fwd/l3fwd_em_hlm.h        |  4 ++--
 examples/l3fwd/l3fwd_em_sequential.h |  5 ++--
 examples/l3fwd/l3fwd_lpm.c           |  2 +-
 examples/l3fwd/l3fwd_lpm_sse.h       |  5 ++--
 examples/l3fwd/l3fwd_sse.h           |  4 ++--
 examples/l3fwd/main.c                | 35 ++++++++++++++++------------
 8 files changed, 35 insertions(+), 28 deletions(-)
diff --git a/examples/l3fwd/l3fwd_common.h b/examples/l3fwd/l3fwd_common.h
index 7d83ff641a..ab114af8c6 100644
--- a/examples/l3fwd/l3fwd_common.h
+++ b/examples/l3fwd/l3fwd_common.h
@@ -178,8 +178,8 @@ static const struct {
 };
 
 static __rte_always_inline void
-send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
-		uint32_t num)
+send_packetsx4(struct lcore_conf *qconf, uint16_t port, uint16_t queueid,
+	       struct rte_mbuf *m[], uint32_t num)
 {
 	uint32_t len, j, n;
 
@@ -190,7 +190,7 @@ send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
 	 * then send them straightway.
 	 */
 	if (num >= MAX_TX_BURST && len == 0) {
-		n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num);
+		n = rte_eth_tx_burst(port, queueid, m, num);
 		if (unlikely(n < num)) {
 			do {
 				rte_pktmbuf_free(m[n]);
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 9996bfba34..8fddb8d55d 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -686,7 +686,7 @@ em_main_loop(__rte_unused void *dummy)
 
 #if defined RTE_ARCH_X86 || defined __ARM_NEON
 			l3fwd_em_send_packets(nb_rx, pkts_burst,
-							portid, qconf);
+							portid, queueid, qconf);
 #else
 			l3fwd_em_no_opt_send_packets(nb_rx, pkts_burst,
 							portid, qconf);
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index 278707c18c..d08f393eed 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -183,7 +183,7 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
  */
 static inline void
 l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
-		uint16_t portid, struct lcore_conf *qconf)
+		uint16_t portid, uint16_t queueid, struct lcore_conf *qconf)
 {
 	int32_t i, j, pos;
 	uint16_t dst_port[MAX_PKT_BURST];
@@ -238,7 +238,7 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
 	for (; j < nb_rx; j++)
 		dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
 
-	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+	send_packets_multi(qconf, pkts_burst, dst_port, queueid, nb_rx);
 
 }
 
diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h
index 6170052cf8..2d7071b0c9 100644
--- a/examples/l3fwd/l3fwd_em_sequential.h
+++ b/examples/l3fwd/l3fwd_em_sequential.h
@@ -74,7 +74,8 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
  */
 static inline void
 l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
-			uint16_t portid, struct lcore_conf *qconf)
+		      uint16_t portid, uint16_t queueid,
+		      struct lcore_conf *qconf)
 {
 	int32_t i, j;
 	uint16_t dst_port[MAX_PKT_BURST];
@@ -93,7 +94,7 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
 		dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
 	}
 
-	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+	send_packets_multi(qconf, pkts_burst, dst_port, queueid, nb_rx);
 }
 
 /*
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
index 3dcf1fef18..8153150c37 100644
--- a/examples/l3fwd/l3fwd_lpm.c
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -243,7 +243,7 @@ lpm_main_loop(__rte_unused void *dummy)
 #if defined RTE_ARCH_X86 || defined __ARM_NEON \
 			 || defined RTE_ARCH_PPC_64
 			l3fwd_lpm_send_packets(nb_rx, pkts_burst,
-						portid, qconf);
+						portid, queueid, qconf);
 #else
 			l3fwd_lpm_no_opt_send_packets(nb_rx, pkts_burst,
 							portid, qconf);
diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h
index 3f637a23d1..cd68179b76 100644
--- a/examples/l3fwd/l3fwd_lpm_sse.h
+++ b/examples/l3fwd/l3fwd_lpm_sse.h
@@ -83,7 +83,8 @@ processx4_step2(const struct lcore_conf *qconf,
  */
 static inline void
 l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
-			uint16_t portid, struct lcore_conf *qconf)
+		       uint16_t portid, uint16_t queueid,
+		       struct lcore_conf *qconf)
 {
 	int32_t j;
 	uint16_t dst_port[MAX_PKT_BURST];
@@ -114,7 +115,7 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
 		j++;
 	}
 
-	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+	send_packets_multi(qconf, pkts_burst, dst_port, queueid, nb_rx);
 }
 
 #endif /* __L3FWD_LPM_SSE_H__ */
diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h
index bb565ed546..f91580a4ce 100644
--- a/examples/l3fwd/l3fwd_sse.h
+++ b/examples/l3fwd/l3fwd_sse.h
@@ -125,7 +125,7 @@ process_packet(struct rte_mbuf *pkt, uint16_t *dst_port)
  */
 static __rte_always_inline void
 send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
-		uint16_t dst_port[MAX_PKT_BURST], int nb_rx)
+		uint16_t dst_port[MAX_PKT_BURST], uint16_t queueid, int nb_rx)
 {
 	int32_t k;
 	int j = 0;
@@ -220,7 +220,7 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
 		k = pnum[j];
 
 		if (likely(pn != BAD_PORT))
-			send_packetsx4(qconf, pn, pkts_burst + j, k);
+			send_packetsx4(qconf, pn, queueid, pkts_burst + j, k);
 		else
 			for (m = j; m != j + k; m++)
 				rte_pktmbuf_free(pkts_burst[m]);
diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
index d62dec434c..93922e7d48 100644
--- a/examples/l3fwd/main.c
+++ b/examples/l3fwd/main.c
@@ -935,7 +935,7 @@ l3fwd_poll_resource_setup(void)
 		fflush(stdout);
 
 		nb_rx_queue = get_port_n_rx_queues(portid);
-		n_tx_queue = nb_lcores;
+		n_tx_queue = nb_rx_queue;
 		if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
 			n_tx_queue = MAX_TX_QUEUE_PER_PORT;
 		printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
@@ -1006,11 +1006,12 @@ l3fwd_poll_resource_setup(void)
 		if (ret < 0)
 			rte_exit(EXIT_FAILURE, "init_mem failed\n");
 
-		/* init one TX queue per couple (lcore,port) */
+		/* init TX queues per couple (lcore,port) */
 		queueid = 0;
 		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 			if (rte_lcore_is_enabled(lcore_id) == 0)
 				continue;
+			qconf = &lcore_conf[lcore_id];
 
 			if (numa_on)
 				socketid =
@@ -1018,21 +1019,25 @@ l3fwd_poll_resource_setup(void)
 			else
 				socketid = 0;
 
-			printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
-			fflush(stdout);
-
-			txconf = &dev_info.default_txconf;
-			txconf->offloads = local_port_conf.txmode.offloads;
-			ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
-						     socketid, txconf);
-			if (ret < 0)
-				rte_exit(EXIT_FAILURE,
-					"rte_eth_tx_queue_setup: err=%d, "
-					"port=%d\n", ret, portid);
+			for (queue = 0; queue < qconf->n_rx_queue; queue++) {
+				queueid = qconf->rx_queue_list[queue].queue_id;
+				printf("txq=%u,%d,%d ",
+					lcore_id, queueid, socketid);
+				fflush(stdout);
+
+				txconf = &dev_info.default_txconf;
+				txconf->offloads =
+					local_port_conf.txmode.offloads;
+				ret = rte_eth_tx_queue_setup
+					(portid, queueid, nb_txd,
+					 socketid, txconf);
+				if (ret < 0)
+					rte_exit(EXIT_FAILURE,
+						"rte_eth_tx_queue_setup: err=%d, "
+						"port=%d\n", ret, portid);
+			}
 
-			qconf = &lcore_conf[lcore_id];
 			qconf->tx_queue_id[portid] = queueid;
-			queueid++;
 
 			qconf->tx_port_id[qconf->n_tx_port] = portid;
 			qconf->n_tx_port++;
-- 
2.17.1
^ permalink raw reply	[flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
  2020-11-04  7:28 [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore Leyi Rong
@ 2020-11-04  8:14 ` David Marchand
  2020-11-04  8:34   ` Rong, Leyi
  0 siblings, 1 reply; 13+ messages in thread
From: David Marchand @ 2020-11-04  8:14 UTC (permalink / raw)
  To: Leyi Rong; +Cc: Qi Zhang, dev
If I count well, this is the v3 of the patch.
Please version your patches.
On Wed, Nov 4, 2020 at 8:52 AM Leyi Rong <leyi.rong@intel.com> wrote:
>
> Currently, l3fwd doesn't support multiple Tx queues, while
> multiple Rx queues is supported.
> To improve the throughput performance when polling multiple
> queues, this patch enables multiple Tx queues handling on a lcore.
Why would there be a gain in using multiple txq?
Is it with hw txq? sw txq? .. ?
-- 
David Marchand
^ permalink raw reply	[flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
  2020-11-04  8:14 ` David Marchand
@ 2020-11-04  8:34   ` Rong, Leyi
  2020-11-04  8:43     ` David Marchand
  0 siblings, 1 reply; 13+ messages in thread
From: Rong, Leyi @ 2020-11-04  8:34 UTC (permalink / raw)
  To: David Marchand; +Cc: Zhang, Qi Z, dev
> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Wednesday, November 4, 2020 4:14 PM
> To: Rong, Leyi <leyi.rong@intel.com>
> Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; dev <dev@dpdk.org>
> Subject: Re: [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
> 
> If I count well, this is the v3 of the patch.
> Please version your patches.
The previous versions are set to superseded. As nothing changes with content
on those versions, can start from this version?
> 
> On Wed, Nov 4, 2020 at 8:52 AM Leyi Rong <leyi.rong@intel.com> wrote:
> >
> > Currently, l3fwd doesn't support multiple Tx queues, while multiple Rx
> > queues is supported.
> > To improve the throughput performance when polling multiple queues,
> > this patch enables multiple Tx queues handling on a lcore.
> 
> Why would there be a gain in using multiple txq?
> Is it with hw txq? sw txq? .. ?
> 
> 
> --
> David Marchand
As there always has thoughput limit for per queue, on some performance test case by using l3fwd,
the result will limited by the per queue thoughput limit. With multiple Tx queue enabled, the per
queue thoughput limit can be eliminated if the CPU core is not the bottleneck.
Leyi
^ permalink raw reply	[flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
  2020-11-04  8:34   ` Rong, Leyi
@ 2020-11-04  8:43     ` David Marchand
  2020-11-04  9:04       ` Rong, Leyi
  0 siblings, 1 reply; 13+ messages in thread
From: David Marchand @ 2020-11-04  8:43 UTC (permalink / raw)
  To: Rong, Leyi; +Cc: Zhang, Qi Z, dev
On Wed, Nov 4, 2020 at 9:34 AM Rong, Leyi <leyi.rong@intel.com> wrote:
> > -----Original Message-----
> > From: David Marchand <david.marchand@redhat.com>
> > Sent: Wednesday, November 4, 2020 4:14 PM
> > To: Rong, Leyi <leyi.rong@intel.com>
> > Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; dev <dev@dpdk.org>
> > Subject: Re: [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
> >
> > If I count well, this is the v3 of the patch.
> > Please version your patches.
>
> The previous versions are set to superseded. As nothing changes with content
> on those versions, can start from this version?
The commitlog changes even if the code itself did not change, so this
is a different patch.
Different patches mean different versions.
This shows that some work happened since the v1 submission.
> As there always has thoughput limit for per queue, on some performance test case by using l3fwd,
> the result will limited by the per queue thoughput limit. With multiple Tx queue enabled, the per
> queue thoughput limit can be eliminated if the CPU core is not the bottleneck.
Ah interesting.
Which nic has such limitations?
How much of an improvement can be expected from this?
-- 
David Marchand
^ permalink raw reply	[flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
  2020-11-04  8:43     ` David Marchand
@ 2020-11-04  9:04       ` Rong, Leyi
  2020-11-05  7:14         ` Jerin Jacob
  0 siblings, 1 reply; 13+ messages in thread
From: Rong, Leyi @ 2020-11-04  9:04 UTC (permalink / raw)
  To: David Marchand; +Cc: Zhang, Qi Z, dev
> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Wednesday, November 4, 2020 4:43 PM
> To: Rong, Leyi <leyi.rong@intel.com>
> Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; dev <dev@dpdk.org>
> Subject: Re: [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
> 
> On Wed, Nov 4, 2020 at 9:34 AM Rong, Leyi <leyi.rong@intel.com> wrote:
> > > -----Original Message-----
> > > From: David Marchand <david.marchand@redhat.com>
> > > Sent: Wednesday, November 4, 2020 4:14 PM
> > > To: Rong, Leyi <leyi.rong@intel.com>
> > > Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; dev <dev@dpdk.org>
> > > Subject: Re: [PATCH] examples/l3fwd: enable multiple Tx queues on a
> > > lcore
> > >
> > > If I count well, this is the v3 of the patch.
> > > Please version your patches.
> >
> > The previous versions are set to superseded. As nothing changes with
> > content on those versions, can start from this version?
> 
> The commitlog changes even if the code itself did not change, so this is a
> different patch.
> Different patches mean different versions.
> This shows that some work happened since the v1 submission.
> 
Agreed.
> 
> > As there always has thoughput limit for per queue, on some performance
> > test case by using l3fwd, the result will limited by the per queue
> > thoughput limit. With multiple Tx queue enabled, the per queue thoughput
> limit can be eliminated if the CPU core is not the bottleneck.
> 
> Ah interesting.
> Which nic has such limitations?
> How much of an improvement can be expected from this?
> 
> 
> --
> David Marchand
The initial found was on XXV710 25Gb NIC, but suppose such issue can happen on more NICs 
as the high-end CPU per core boundary is higher than many NICs(except 100Gb and above) per queue performance boundary.
The improvement can be about 1.8X with that case@1t2q. 
Leyi
^ permalink raw reply	[flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
  2020-11-04  9:04       ` Rong, Leyi
@ 2020-11-05  7:14         ` Jerin Jacob
  2020-11-05  9:24           ` Rong, Leyi
  0 siblings, 1 reply; 13+ messages in thread
From: Jerin Jacob @ 2020-11-05  7:14 UTC (permalink / raw)
  To: Rong, Leyi; +Cc: David Marchand, Zhang, Qi Z, dev
On Wed, Nov 4, 2020 at 2:34 PM Rong, Leyi <leyi.rong@intel.com> wrote:
>
>
> > -----Original Message-----
> > From: David Marchand <david.marchand@redhat.com>
> > Sent: Wednesday, November 4, 2020 4:43 PM
> > To: Rong, Leyi <leyi.rong@intel.com>
> > Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; dev <dev@dpdk.org>
> > Subject: Re: [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
> >
> > On Wed, Nov 4, 2020 at 9:34 AM Rong, Leyi <leyi.rong@intel.com> wrote:
> > > > -----Original Message-----
> > > > From: David Marchand <david.marchand@redhat.com>
> > > > Sent: Wednesday, November 4, 2020 4:14 PM
> > > > To: Rong, Leyi <leyi.rong@intel.com>
> > > > Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; dev <dev@dpdk.org>
> > > > Subject: Re: [PATCH] examples/l3fwd: enable multiple Tx queues on a
> > > > lcore
> > > >
> > > > If I count well, this is the v3 of the patch.
> > > > Please version your patches.
> > >
> > > The previous versions are set to superseded. As nothing changes with
> > > content on those versions, can start from this version?
> >
> > The commitlog changes even if the code itself did not change, so this is a
> > different patch.
> > Different patches mean different versions.
> > This shows that some work happened since the v1 submission.
> >
>
> Agreed.
> >
> > > As there always has thoughput limit for per queue, on some performance
> > > test case by using l3fwd, the result will limited by the per queue
> > > thoughput limit. With multiple Tx queue enabled, the per queue thoughput
> > limit can be eliminated if the CPU core is not the bottleneck.
> >
> > Ah interesting.
> > Which nic has such limitations?
> > How much of an improvement can be expected from this?
> >
> >
> > --
> > David Marchand
>
> The initial found was on XXV710 25Gb NIC, but suppose such issue can happen on more NICs
> as the high-end CPU per core boundary is higher than many NICs(except 100Gb and above) per queue performance boundary.
> The improvement can be about 1.8X with that case@1t2q.
As far as I understand, the Current l3fwd Tx queue creation is like this:
If the app has N cores and M ports then l3fwd creates, N x M Tx queues in total,
What will be new values based on this patch?
Does this patch has any regression in case the NIC queues able to cope
up with the throughput limit
from CPU.
>
> Leyi
>
^ permalink raw reply	[flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
  2020-11-05  7:14         ` Jerin Jacob
@ 2020-11-05  9:24           ` Rong, Leyi
  2021-03-24 17:23             ` Thomas Monjalon
  0 siblings, 1 reply; 13+ messages in thread
From: Rong, Leyi @ 2020-11-05  9:24 UTC (permalink / raw)
  To: Jerin Jacob; +Cc: David Marchand, Zhang, Qi Z, dev
> -----Original Message-----
> From: Jerin Jacob <jerinjacobk@gmail.com>
> Sent: Thursday, November 5, 2020 3:15 PM
> To: Rong, Leyi <leyi.rong@intel.com>
> Cc: David Marchand <david.marchand@redhat.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>; dev <dev@dpdk.org>
> Subject: Re: [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on
> a lcore
> 
> On Wed, Nov 4, 2020 at 2:34 PM Rong, Leyi <leyi.rong@intel.com> wrote:
> >
> >
> > > -----Original Message-----
> > > From: David Marchand <david.marchand@redhat.com>
> > > Sent: Wednesday, November 4, 2020 4:43 PM
> > > To: Rong, Leyi <leyi.rong@intel.com>
> > > Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; dev <dev@dpdk.org>
> > > Subject: Re: [PATCH] examples/l3fwd: enable multiple Tx queues on a
> > > lcore
> > >
> > > On Wed, Nov 4, 2020 at 9:34 AM Rong, Leyi <leyi.rong@intel.com> wrote:
> > > > > -----Original Message-----
> > > > > From: David Marchand <david.marchand@redhat.com>
> > > > > Sent: Wednesday, November 4, 2020 4:14 PM
> > > > > To: Rong, Leyi <leyi.rong@intel.com>
> > > > > Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; dev <dev@dpdk.org>
> > > > > Subject: Re: [PATCH] examples/l3fwd: enable multiple Tx queues
> > > > > on a lcore
> > > > >
> > > > > If I count well, this is the v3 of the patch.
> > > > > Please version your patches.
> > > >
> > > > The previous versions are set to superseded. As nothing changes
> > > > with content on those versions, can start from this version?
> > >
> > > The commitlog changes even if the code itself did not change, so
> > > this is a different patch.
> > > Different patches mean different versions.
> > > This shows that some work happened since the v1 submission.
> > >
> >
> > Agreed.
> > >
> > > > As there always has thoughput limit for per queue, on some
> > > > performance test case by using l3fwd, the result will limited by
> > > > the per queue thoughput limit. With multiple Tx queue enabled, the
> > > > per queue thoughput
> > > limit can be eliminated if the CPU core is not the bottleneck.
> > >
> > > Ah interesting.
> > > Which nic has such limitations?
> > > How much of an improvement can be expected from this?
> > >
> > >
> > > --
> > > David Marchand
> >
> > The initial found was on XXV710 25Gb NIC, but suppose such issue can
> > happen on more NICs as the high-end CPU per core boundary is higher than
> many NICs(except 100Gb and above) per queue performance boundary.
> > The improvement can be about 1.8X with that case@1t2q.
> 
> As far as I understand, the Current l3fwd Tx queue creation is like this:
> If the app has N cores and M ports then l3fwd creates, N x M Tx queues in total,
> What will be new values based on this patch?
> 
Hi Jacob,
Total queues number equals to queues per port multiply port number.
Just take #l3fwd -l 5,6 -n 6 -- -p 0x3 --config '(0,0,5),(0,1,5),(1,0,6),(1,1,6)' as example, 
With this patch appied, totally 2x2=4 tx queues can be polled, while only
1x2=2 tx queues can be used before.
> Does this patch has any regression in case the NIC queues able to cope up with
> the throughput limit from CPU.
> 
Regression test relevant with l3fwd passed with this patch, no obvious result drop 
on other cases.
> 
> >
> > Leyi
> >
^ permalink raw reply	[flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
  2020-11-05  9:24           ` Rong, Leyi
@ 2021-03-24 17:23             ` Thomas Monjalon
  2021-03-25  5:38               ` Rong, Leyi
  0 siblings, 1 reply; 13+ messages in thread
From: Thomas Monjalon @ 2021-03-24 17:23 UTC (permalink / raw)
  To: Jerin Jacob, David Marchand, Rong, Leyi
  Cc: dev, Zhang, Qi Z, bruce.richardson, konstantin.ananyev
05/11/2020 10:24, Rong, Leyi:
> From: Jerin Jacob <jerinjacobk@gmail.com>
> > On Wed, Nov 4, 2020 at 2:34 PM Rong, Leyi <leyi.rong@intel.com> wrote:
> > > From: David Marchand <david.marchand@redhat.com>
> > > > On Wed, Nov 4, 2020 at 9:34 AM Rong, Leyi <leyi.rong@intel.com> wrote:
> > > > > As there always has thoughput limit for per queue, on some
> > > > > performance test case by using l3fwd, the result will limited by
> > > > > the per queue thoughput limit. With multiple Tx queue enabled, the
> > > > > per queue thoughput
> > > > > limit can be eliminated if the CPU core is not the bottleneck.
> > > >
> > > > Ah interesting.
> > > > Which nic has such limitations?
> > > > How much of an improvement can be expected from this?
> > >
> > > The initial found was on XXV710 25Gb NIC, but suppose such issue can
> > > happen on more NICs as the high-end CPU per core boundary is higher than
> > > many NICs(except 100Gb and above) per queue performance boundary.
> > > The improvement can be about 1.8X with that case@1t2q.
> > 
> > As far as I understand, the Current l3fwd Tx queue creation is like this:
> > If the app has N cores and M ports then l3fwd creates, N x M Tx queues in total,
> > What will be new values based on this patch?
Thank you Jerin for providing some info missing in the description of the patch.
> Hi Jacob,
> 
> Total queues number equals to queues per port multiply port number.
> Just take #l3fwd -l 5,6 -n 6 -- -p 0x3 --config '(0,0,5),(0,1,5),(1,0,6),(1,1,6)' as example, 
> With this patch appied, totally 2x2=4 tx queues can be polled, while only
> 1x2=2 tx queues can be used before.
It does not reply above question with N x M.
> > Does this patch has any regression in case the NIC queues able to cope up with
> > the throughput limit from CPU.
> 
> Regression test relevant with l3fwd passed with this patch, no obvious result drop 
> on other cases.
It does not reply the general question for all drivers you did not test.
As you probably noticed, this patch is blocked for months
because it is not properly explained.
^ permalink raw reply	[flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
  2021-03-24 17:23             ` Thomas Monjalon
@ 2021-03-25  5:38               ` Rong, Leyi
  2021-03-25  8:10                 ` Thomas Monjalon
  0 siblings, 1 reply; 13+ messages in thread
From: Rong, Leyi @ 2021-03-25  5:38 UTC (permalink / raw)
  To: Thomas Monjalon, Jerin Jacob, David Marchand
  Cc: dev, Zhang, Qi Z, Richardson, Bruce, Ananyev, Konstantin
> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Thursday, March 25, 2021 1:23 AM
> To: Jerin Jacob <jerinjacobk@gmail.com>; David Marchand
> <david.marchand@redhat.com>; Rong, Leyi <leyi.rong@intel.com>
> Cc: dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>
> Subject: Re: [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on
> a lcore
> 
> 05/11/2020 10:24, Rong, Leyi:
> > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > On Wed, Nov 4, 2020 at 2:34 PM Rong, Leyi <leyi.rong@intel.com> wrote:
> > > > From: David Marchand <david.marchand@redhat.com>
> > > > > On Wed, Nov 4, 2020 at 9:34 AM Rong, Leyi <leyi.rong@intel.com>
> wrote:
> > > > > > As there always has thoughput limit for per queue, on some
> > > > > > performance test case by using l3fwd, the result will limited
> > > > > > by the per queue thoughput limit. With multiple Tx queue
> > > > > > enabled, the per queue thoughput limit can be eliminated if
> > > > > > the CPU core is not the bottleneck.
> > > > >
> > > > > Ah interesting.
> > > > > Which nic has such limitations?
> > > > > How much of an improvement can be expected from this?
> > > >
> > > > The initial found was on XXV710 25Gb NIC, but suppose such issue
> > > > can happen on more NICs as the high-end CPU per core boundary is
> > > > higher than many NICs(except 100Gb and above) per queue performance
> boundary.
> > > > The improvement can be about 1.8X with that case@1t2q.
> > >
> > > As far as I understand, the Current l3fwd Tx queue creation is like this:
> > > If the app has N cores and M ports then l3fwd creates, N x M Tx
> > > queues in total, What will be new values based on this patch?
> 
> Thank you Jerin for providing some info missing in the description of the patch.
> 
> > Hi Jacob,
> >
> > Total queues number equals to queues per port multiply port number.
> > Just take #l3fwd -l 5,6 -n 6 -- -p 0x3 --config
> > '(0,0,5),(0,1,5),(1,0,6),(1,1,6)' as example, With this patch appied,
> > totally 2x2=4 tx queues can be polled, while only
> > 1x2=2 tx queues can be used before.
> 
> It does not reply above question with N x M.
> 
> > > Does this patch has any regression in case the NIC queues able to
> > > cope up with the throughput limit from CPU.
> >
> > Regression test relevant with l3fwd passed with this patch, no obvious
> > result drop on other cases.
> 
> It does not reply the general question for all drivers you did not test.
> 
> As you probably noticed, this patch is blocked for months because it is not
> properly explained.
> 
Hi Thomas,
This patch can be abandoned after synced with Konstantin months ago. And update the state to superseded on patchwork, Thanks!
^ permalink raw reply	[flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
  2021-03-25  5:38               ` Rong, Leyi
@ 2021-03-25  8:10                 ` Thomas Monjalon
  0 siblings, 0 replies; 13+ messages in thread
From: Thomas Monjalon @ 2021-03-25  8:10 UTC (permalink / raw)
  To: Rong, Leyi
  Cc: Jerin Jacob, David Marchand, dev, Zhang, Qi Z, Richardson, Bruce,
	Ananyev, Konstantin
25/03/2021 06:38, Rong, Leyi:
> From: Thomas Monjalon <thomas@monjalon.net>
> > 05/11/2020 10:24, Rong, Leyi:
> > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > > On Wed, Nov 4, 2020 at 2:34 PM Rong, Leyi <leyi.rong@intel.com> wrote:
> > > > > From: David Marchand <david.marchand@redhat.com>
> > > > > > On Wed, Nov 4, 2020 at 9:34 AM Rong, Leyi <leyi.rong@intel.com>
> > wrote:
> > > > > > > As there always has thoughput limit for per queue, on some
> > > > > > > performance test case by using l3fwd, the result will limited
> > > > > > > by the per queue thoughput limit. With multiple Tx queue
> > > > > > > enabled, the per queue thoughput limit can be eliminated if
> > > > > > > the CPU core is not the bottleneck.
> > > > > >
> > > > > > Ah interesting.
> > > > > > Which nic has such limitations?
> > > > > > How much of an improvement can be expected from this?
> > > > >
> > > > > The initial found was on XXV710 25Gb NIC, but suppose such issue
> > > > > can happen on more NICs as the high-end CPU per core boundary is
> > > > > higher than many NICs(except 100Gb and above) per queue performance
> > boundary.
> > > > > The improvement can be about 1.8X with that case@1t2q.
> > > >
> > > > As far as I understand, the Current l3fwd Tx queue creation is like this:
> > > > If the app has N cores and M ports then l3fwd creates, N x M Tx
> > > > queues in total, What will be new values based on this patch?
> > 
> > Thank you Jerin for providing some info missing in the description of the patch.
> > 
> > > Hi Jacob,
> > >
> > > Total queues number equals to queues per port multiply port number.
> > > Just take #l3fwd -l 5,6 -n 6 -- -p 0x3 --config
> > > '(0,0,5),(0,1,5),(1,0,6),(1,1,6)' as example, With this patch appied,
> > > totally 2x2=4 tx queues can be polled, while only
> > > 1x2=2 tx queues can be used before.
> > 
> > It does not reply above question with N x M.
> > 
> > > > Does this patch has any regression in case the NIC queues able to
> > > > cope up with the throughput limit from CPU.
> > >
> > > Regression test relevant with l3fwd passed with this patch, no obvious
> > > result drop on other cases.
> > 
> > It does not reply the general question for all drivers you did not test.
> > 
> > As you probably noticed, this patch is blocked for months because it is not
> > properly explained.
> > 
> 
> Hi Thomas,
> 
> This patch can be abandoned after synced with Konstantin months ago. And update the state to superseded on patchwork, Thanks!
"Superseded" means a newer version has been sent.
I will change to "Rejected".
^ permalink raw reply	[flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
  2020-11-02  8:12 Leyi Rong
@ 2020-11-02  8:44 ` David Marchand
  0 siblings, 0 replies; 13+ messages in thread
From: David Marchand @ 2020-11-02  8:44 UTC (permalink / raw)
  To: Leyi Rong; +Cc: Qi Zhang, dev
On Mon, Nov 2, 2020 at 9:36 AM Leyi Rong <leyi.rong@intel.com> wrote:
>
> This patch enable multiple Tx queues handling on a lcore.
This is the "What".
The "Why" should come first, so please explain what makes you think we
need this patch.
Thanks.
-- 
David Marchand
^ permalink raw reply	[flat|nested] 13+ messages in thread
* [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
@ 2020-11-02  8:12 Leyi Rong
  2020-11-02  8:44 ` David Marchand
  0 siblings, 1 reply; 13+ messages in thread
From: Leyi Rong @ 2020-11-02  8:12 UTC (permalink / raw)
  To: qi.z.zhang; +Cc: dev, Leyi Rong
This patch enable multiple Tx queues handling on a lcore.
Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 examples/l3fwd/l3fwd_common.h        |  6 ++---
 examples/l3fwd/l3fwd_em.c            |  2 +-
 examples/l3fwd/l3fwd_em_hlm.h        |  4 ++--
 examples/l3fwd/l3fwd_em_sequential.h |  5 +++--
 examples/l3fwd/l3fwd_lpm.c           |  2 +-
 examples/l3fwd/l3fwd_lpm_sse.h       |  5 +++--
 examples/l3fwd/l3fwd_sse.h           |  4 ++--
 examples/l3fwd/main.c                | 33 ++++++++++++++++------------
 8 files changed, 34 insertions(+), 27 deletions(-)
diff --git a/examples/l3fwd/l3fwd_common.h b/examples/l3fwd/l3fwd_common.h
index 7d83ff641a..ab114af8c6 100644
--- a/examples/l3fwd/l3fwd_common.h
+++ b/examples/l3fwd/l3fwd_common.h
@@ -178,8 +178,8 @@ static const struct {
 };
 
 static __rte_always_inline void
-send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
-		uint32_t num)
+send_packetsx4(struct lcore_conf *qconf, uint16_t port, uint16_t queueid,
+	       struct rte_mbuf *m[], uint32_t num)
 {
 	uint32_t len, j, n;
 
@@ -190,7 +190,7 @@ send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
 	 * then send them straightway.
 	 */
 	if (num >= MAX_TX_BURST && len == 0) {
-		n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num);
+		n = rte_eth_tx_burst(port, queueid, m, num);
 		if (unlikely(n < num)) {
 			do {
 				rte_pktmbuf_free(m[n]);
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 9996bfba34..8fddb8d55d 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -686,7 +686,7 @@ em_main_loop(__rte_unused void *dummy)
 
 #if defined RTE_ARCH_X86 || defined __ARM_NEON
 			l3fwd_em_send_packets(nb_rx, pkts_burst,
-							portid, qconf);
+							portid, queueid, qconf);
 #else
 			l3fwd_em_no_opt_send_packets(nb_rx, pkts_burst,
 							portid, qconf);
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index 278707c18c..d08f393eed 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -183,7 +183,7 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
  */
 static inline void
 l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
-		uint16_t portid, struct lcore_conf *qconf)
+		uint16_t portid, uint16_t queueid, struct lcore_conf *qconf)
 {
 	int32_t i, j, pos;
 	uint16_t dst_port[MAX_PKT_BURST];
@@ -238,7 +238,7 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
 	for (; j < nb_rx; j++)
 		dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
 
-	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+	send_packets_multi(qconf, pkts_burst, dst_port, queueid, nb_rx);
 
 }
 
diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h
index 6170052cf8..2d7071b0c9 100644
--- a/examples/l3fwd/l3fwd_em_sequential.h
+++ b/examples/l3fwd/l3fwd_em_sequential.h
@@ -74,7 +74,8 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
  */
 static inline void
 l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
-			uint16_t portid, struct lcore_conf *qconf)
+		      uint16_t portid, uint16_t queueid,
+		      struct lcore_conf *qconf)
 {
 	int32_t i, j;
 	uint16_t dst_port[MAX_PKT_BURST];
@@ -93,7 +94,7 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
 		dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
 	}
 
-	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+	send_packets_multi(qconf, pkts_burst, dst_port, queueid, nb_rx);
 }
 
 /*
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
index 3dcf1fef18..8153150c37 100644
--- a/examples/l3fwd/l3fwd_lpm.c
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -243,7 +243,7 @@ lpm_main_loop(__rte_unused void *dummy)
 #if defined RTE_ARCH_X86 || defined __ARM_NEON \
 			 || defined RTE_ARCH_PPC_64
 			l3fwd_lpm_send_packets(nb_rx, pkts_burst,
-						portid, qconf);
+						portid, queueid, qconf);
 #else
 			l3fwd_lpm_no_opt_send_packets(nb_rx, pkts_burst,
 							portid, qconf);
diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h
index 3f637a23d1..cd68179b76 100644
--- a/examples/l3fwd/l3fwd_lpm_sse.h
+++ b/examples/l3fwd/l3fwd_lpm_sse.h
@@ -83,7 +83,8 @@ processx4_step2(const struct lcore_conf *qconf,
  */
 static inline void
 l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
-			uint16_t portid, struct lcore_conf *qconf)
+		       uint16_t portid, uint16_t queueid,
+		       struct lcore_conf *qconf)
 {
 	int32_t j;
 	uint16_t dst_port[MAX_PKT_BURST];
@@ -114,7 +115,7 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
 		j++;
 	}
 
-	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+	send_packets_multi(qconf, pkts_burst, dst_port, queueid, nb_rx);
 }
 
 #endif /* __L3FWD_LPM_SSE_H__ */
diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h
index bb565ed546..f91580a4ce 100644
--- a/examples/l3fwd/l3fwd_sse.h
+++ b/examples/l3fwd/l3fwd_sse.h
@@ -125,7 +125,7 @@ process_packet(struct rte_mbuf *pkt, uint16_t *dst_port)
  */
 static __rte_always_inline void
 send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
-		uint16_t dst_port[MAX_PKT_BURST], int nb_rx)
+		uint16_t dst_port[MAX_PKT_BURST], uint16_t queueid, int nb_rx)
 {
 	int32_t k;
 	int j = 0;
@@ -220,7 +220,7 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
 		k = pnum[j];
 
 		if (likely(pn != BAD_PORT))
-			send_packetsx4(qconf, pn, pkts_burst + j, k);
+			send_packetsx4(qconf, pn, queueid, pkts_burst + j, k);
 		else
 			for (m = j; m != j + k; m++)
 				rte_pktmbuf_free(pkts_burst[m]);
diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
index d62dec434c..d3c8e5cb5d 100644
--- a/examples/l3fwd/main.c
+++ b/examples/l3fwd/main.c
@@ -935,7 +935,7 @@ l3fwd_poll_resource_setup(void)
 		fflush(stdout);
 
 		nb_rx_queue = get_port_n_rx_queues(portid);
-		n_tx_queue = nb_lcores;
+		n_tx_queue = nb_rx_queue;
 		if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
 			n_tx_queue = MAX_TX_QUEUE_PER_PORT;
 		printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
@@ -1011,6 +1011,7 @@ l3fwd_poll_resource_setup(void)
 		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 			if (rte_lcore_is_enabled(lcore_id) == 0)
 				continue;
+			qconf = &lcore_conf[lcore_id];
 
 			if (numa_on)
 				socketid =
@@ -1018,21 +1019,25 @@ l3fwd_poll_resource_setup(void)
 			else
 				socketid = 0;
 
-			printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
-			fflush(stdout);
-
-			txconf = &dev_info.default_txconf;
-			txconf->offloads = local_port_conf.txmode.offloads;
-			ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
-						     socketid, txconf);
-			if (ret < 0)
-				rte_exit(EXIT_FAILURE,
-					"rte_eth_tx_queue_setup: err=%d, "
-					"port=%d\n", ret, portid);
+			for (queue = 0; queue < qconf->n_rx_queue; queue++) {
+				queueid = qconf->rx_queue_list[queue].queue_id;
+				printf("txq=%u,%d,%d ",
+					lcore_id, queueid, socketid);
+				fflush(stdout);
+
+				txconf = &dev_info.default_txconf;
+				txconf->offloads =
+					local_port_conf.txmode.offloads;
+				ret = rte_eth_tx_queue_setup
+					(portid, queueid, nb_txd,
+					 socketid, txconf);
+				if (ret < 0)
+					rte_exit(EXIT_FAILURE,
+						"rte_eth_tx_queue_setup: err=%d, "
+						"port=%d\n", ret, portid);
+			}
 
-			qconf = &lcore_conf[lcore_id];
 			qconf->tx_queue_id[portid] = queueid;
-			queueid++;
 
 			qconf->tx_port_id[qconf->n_tx_port] = portid;
 			qconf->n_tx_port++;
-- 
2.17.1
^ permalink raw reply	[flat|nested] 13+ messages in thread
* [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore
@ 2020-11-02  5:29 Leyi Rong
  0 siblings, 0 replies; 13+ messages in thread
From: Leyi Rong @ 2020-11-02  5:29 UTC (permalink / raw)
  To: qi.z.zhang; +Cc: dev, Leyi Rong
This patch enable multiple Tx queues handling on a lcore.
Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 examples/l3fwd/l3fwd_common.h        |  6 ++---
 examples/l3fwd/l3fwd_em.c            |  2 +-
 examples/l3fwd/l3fwd_em_hlm.h        |  4 ++--
 examples/l3fwd/l3fwd_em_sequential.h |  5 +++--
 examples/l3fwd/l3fwd_lpm.c           |  2 +-
 examples/l3fwd/l3fwd_lpm_sse.h       |  5 +++--
 examples/l3fwd/l3fwd_sse.h           |  4 ++--
 examples/l3fwd/main.c                | 33 ++++++++++++++++------------
 8 files changed, 34 insertions(+), 27 deletions(-)
diff --git a/examples/l3fwd/l3fwd_common.h b/examples/l3fwd/l3fwd_common.h
index 7d83ff641..ab114af8c 100644
--- a/examples/l3fwd/l3fwd_common.h
+++ b/examples/l3fwd/l3fwd_common.h
@@ -178,8 +178,8 @@ static const struct {
 };
 
 static __rte_always_inline void
-send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
-		uint32_t num)
+send_packetsx4(struct lcore_conf *qconf, uint16_t port, uint16_t queueid,
+	       struct rte_mbuf *m[], uint32_t num)
 {
 	uint32_t len, j, n;
 
@@ -190,7 +190,7 @@ send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
 	 * then send them straightway.
 	 */
 	if (num >= MAX_TX_BURST && len == 0) {
-		n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num);
+		n = rte_eth_tx_burst(port, queueid, m, num);
 		if (unlikely(n < num)) {
 			do {
 				rte_pktmbuf_free(m[n]);
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index fdbee70b4..e5332455d 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -687,7 +687,7 @@ em_main_loop(__rte_unused void *dummy)
 
 #if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON
 			l3fwd_em_send_packets(nb_rx, pkts_burst,
-							portid, qconf);
+							portid, queueid, qconf);
 #else
 			l3fwd_em_no_opt_send_packets(nb_rx, pkts_burst,
 							portid, qconf);
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index 79812716c..1e60e4f84 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -183,7 +183,7 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
  */
 static inline void
 l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
-		uint16_t portid, struct lcore_conf *qconf)
+		uint16_t portid, uint16_t queueid, struct lcore_conf *qconf)
 {
 	int32_t i, j, pos;
 	uint16_t dst_port[MAX_PKT_BURST];
@@ -238,7 +238,7 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
 	for (; j < nb_rx; j++)
 		dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
 
-	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+	send_packets_multi(qconf, pkts_burst, dst_port, queueid, nb_rx);
 
 }
 
diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h
index b231b9994..39a4c2fca 100644
--- a/examples/l3fwd/l3fwd_em_sequential.h
+++ b/examples/l3fwd/l3fwd_em_sequential.h
@@ -74,7 +74,8 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
  */
 static inline void
 l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
-			uint16_t portid, struct lcore_conf *qconf)
+		      uint16_t portid, uint16_t queueid,
+		      struct lcore_conf *qconf)
 {
 	int32_t i, j;
 	uint16_t dst_port[MAX_PKT_BURST];
@@ -93,7 +94,7 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
 		dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
 	}
 
-	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+	send_packets_multi(qconf, pkts_burst, dst_port, queueid, nb_rx);
 }
 
 /*
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
index 91eb74272..8be59ac58 100644
--- a/examples/l3fwd/l3fwd_lpm.c
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -243,7 +243,7 @@ lpm_main_loop(__rte_unused void *dummy)
 #if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON \
 			 || defined RTE_ARCH_PPC_64
 			l3fwd_lpm_send_packets(nb_rx, pkts_burst,
-						portid, qconf);
+						portid, queueid, qconf);
 #else
 			l3fwd_lpm_no_opt_send_packets(nb_rx, pkts_burst,
 							portid, qconf);
diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h
index 3f637a23d..cd68179b7 100644
--- a/examples/l3fwd/l3fwd_lpm_sse.h
+++ b/examples/l3fwd/l3fwd_lpm_sse.h
@@ -83,7 +83,8 @@ processx4_step2(const struct lcore_conf *qconf,
  */
 static inline void
 l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
-			uint16_t portid, struct lcore_conf *qconf)
+		       uint16_t portid, uint16_t queueid,
+		       struct lcore_conf *qconf)
 {
 	int32_t j;
 	uint16_t dst_port[MAX_PKT_BURST];
@@ -114,7 +115,7 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
 		j++;
 	}
 
-	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+	send_packets_multi(qconf, pkts_burst, dst_port, queueid, nb_rx);
 }
 
 #endif /* __L3FWD_LPM_SSE_H__ */
diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h
index bb565ed54..f91580a4c 100644
--- a/examples/l3fwd/l3fwd_sse.h
+++ b/examples/l3fwd/l3fwd_sse.h
@@ -125,7 +125,7 @@ process_packet(struct rte_mbuf *pkt, uint16_t *dst_port)
  */
 static __rte_always_inline void
 send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
-		uint16_t dst_port[MAX_PKT_BURST], int nb_rx)
+		uint16_t dst_port[MAX_PKT_BURST], uint16_t queueid, int nb_rx)
 {
 	int32_t k;
 	int j = 0;
@@ -220,7 +220,7 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
 		k = pnum[j];
 
 		if (likely(pn != BAD_PORT))
-			send_packetsx4(qconf, pn, pkts_burst + j, k);
+			send_packetsx4(qconf, pn, queueid, pkts_burst + j, k);
 		else
 			for (m = j; m != j + k; m++)
 				rte_pktmbuf_free(pkts_burst[m]);
diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
index 24ede4290..0fb5642d3 100644
--- a/examples/l3fwd/main.c
+++ b/examples/l3fwd/main.c
@@ -941,7 +941,7 @@ l3fwd_poll_resource_setup(void)
 		fflush(stdout);
 
 		nb_rx_queue = get_port_n_rx_queues(portid);
-		n_tx_queue = nb_lcores;
+		n_tx_queue = nb_rx_queue;
 		if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
 			n_tx_queue = MAX_TX_QUEUE_PER_PORT;
 		printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
@@ -1017,6 +1017,7 @@ l3fwd_poll_resource_setup(void)
 		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 			if (rte_lcore_is_enabled(lcore_id) == 0)
 				continue;
+			qconf = &lcore_conf[lcore_id];
 
 			if (numa_on)
 				socketid =
@@ -1024,21 +1025,25 @@ l3fwd_poll_resource_setup(void)
 			else
 				socketid = 0;
 
-			printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
-			fflush(stdout);
-
-			txconf = &dev_info.default_txconf;
-			txconf->offloads = local_port_conf.txmode.offloads;
-			ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
-						     socketid, txconf);
-			if (ret < 0)
-				rte_exit(EXIT_FAILURE,
-					"rte_eth_tx_queue_setup: err=%d, "
-					"port=%d\n", ret, portid);
+			for (queue = 0; queue < qconf->n_rx_queue; queue++) {
+				queueid = qconf->rx_queue_list[queue].queue_id;
+				printf("txq=%u,%d,%d ",
+					lcore_id, queueid, socketid);
+				fflush(stdout);
+
+				txconf = &dev_info.default_txconf;
+				txconf->offloads =
+					local_port_conf.txmode.offloads;
+				ret = rte_eth_tx_queue_setup
+					(portid, queueid, nb_txd,
+					 socketid, txconf);
+				if (ret < 0)
+					rte_exit(EXIT_FAILURE,
+						"rte_eth_tx_queue_setup: err=%d, "
+						"port=%d\n", ret, portid);
+			}
 
-			qconf = &lcore_conf[lcore_id];
 			qconf->tx_queue_id[portid] = queueid;
-			queueid++;
 
 			qconf->tx_port_id[qconf->n_tx_port] = portid;
 			qconf->n_tx_port++;
-- 
2.17.1
^ permalink raw reply	[flat|nested] 13+ messages in thread
end of thread, other threads:[~2021-03-25  8:10 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-04  7:28 [dpdk-dev] [PATCH] examples/l3fwd: enable multiple Tx queues on a lcore Leyi Rong
2020-11-04  8:14 ` David Marchand
2020-11-04  8:34   ` Rong, Leyi
2020-11-04  8:43     ` David Marchand
2020-11-04  9:04       ` Rong, Leyi
2020-11-05  7:14         ` Jerin Jacob
2020-11-05  9:24           ` Rong, Leyi
2021-03-24 17:23             ` Thomas Monjalon
2021-03-25  5:38               ` Rong, Leyi
2021-03-25  8:10                 ` Thomas Monjalon
  -- strict thread matches above, loose matches on Subject: below --
2020-11-02  8:12 Leyi Rong
2020-11-02  8:44 ` David Marchand
2020-11-02  5:29 Leyi Rong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).